diff options
| author | Sjoerd Meijer <sjoerd.meijer@arm.com> | 2016-10-03 10:12:32 +0000 |
|---|---|---|
| committer | Sjoerd Meijer <sjoerd.meijer@arm.com> | 2016-10-03 10:12:32 +0000 |
| commit | 4dbe73c1ed74352899eedf2a07c202ea76c2f65b (patch) | |
| tree | 3251c394733128278858fd939bc79cc2a8f8bd0a /llvm/test/CodeGen/ARM/urem-opt-size.ll | |
| parent | 2c0a7f081a8a681f29e1fc4ad115e77a9ade0f64 (diff) | |
| download | bcm5719-llvm-4dbe73c1ed74352899eedf2a07c202ea76c2f65b.tar.gz bcm5719-llvm-4dbe73c1ed74352899eedf2a07c202ea76c2f65b.zip | |
[ARM] Code size optimisation to lower udiv+urem to udiv+mls instead of a
library call to __aeabi_uidivmod. This is an improved implementation of
r280808, see also D24133, that got reverted because isel was stuck in a loop.
That was caused by the optimisation incorrectly triggering on i64 ints, which
shouldn't happen because there is no 64bit hwdiv support; that put isel's type
legalization and this optimisation in a loop. A native ARM compiler and testing
now shows that this is fixed.
Patch mostly by Pablo Barrio.
Differential Revision: https://reviews.llvm.org/D25077
llvm-svn: 283098
Diffstat (limited to 'llvm/test/CodeGen/ARM/urem-opt-size.ll')
| -rw-r--r-- | llvm/test/CodeGen/ARM/urem-opt-size.ll | 72 |
1 files changed, 72 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/ARM/urem-opt-size.ll b/llvm/test/CodeGen/ARM/urem-opt-size.ll index 7f1cd43bc4e..bcc53604959 100644 --- a/llvm/test/CodeGen/ARM/urem-opt-size.ll +++ b/llvm/test/CodeGen/ARM/urem-opt-size.ll @@ -3,7 +3,12 @@ ; expanded to a sequence of umull, lsrs, muls and sub instructions, but ; just a call to __aeabi_uidivmod. ; +; When the processor features hardware division, UDIV + UREM can be turned +; into UDIV + MLS. This prevents the library function __aeabi_uidivmod to be +; pulled into the binary. The test uses ARMv7-M. +; ; RUN: llc -mtriple=armv7a-eabi -mattr=-neon -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv7m-eabi -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=V7M target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv7m-arm-none-eabi" @@ -28,11 +33,16 @@ entry: ret i32 %div } +; Test for unsigned remainder define i32 @foo3() local_unnamed_addr #0 { entry: ; CHECK-LABEL: foo3: ; CHECK: __aeabi_uidivmod ; CHECK-NOT: umull +; V7M-LABEL: foo3: +; V7M: udiv [[R2:r[0-9]+]], [[R0:r[0-9]+]], [[R1:r[0-9]+]] +; V7M: mls {{r[0-9]+}}, [[R2]], [[R1]], [[R0]] +; V7M-NOT: __aeabi_uidivmod %call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)() %rem = urem i32 %call, 1000000 %cmp = icmp eq i32 %rem, 0 @@ -40,6 +50,68 @@ entry: ret i32 %conv } +; Test for signed remainder +define i32 @foo4() local_unnamed_addr #0 { +entry: +; CHECK-LABEL: foo4: +; CHECK:__aeabi_idivmod +; V7M-LABEL: foo4: +; V7M: sdiv [[R2:r[0-9]+]], [[R0:r[0-9]+]], [[R1:r[0-9]+]] +; V7M: mls {{r[0-9]+}}, [[R2]], [[R1]], [[R0]] +; V7M-NOT: __aeabi_idivmod + %call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)() + %rem = srem i32 %call, 1000000 + ret i32 %rem +} + +; Check that doing a sdiv+srem has the same effect as only the srem, +; as the division needs to be computed anyway in order to calculate +; the remainder (i.e. make sure we don't end up with two divisions). +define i32 @foo5() local_unnamed_addr #0 { +entry: +; CHECK-LABEL: foo5: +; CHECK:__aeabi_idivmod +; V7M-LABEL: foo5: +; V7M: sdiv [[R2:r[0-9]+]], [[R0:r[0-9]+]], [[R1:r[0-9]+]] +; V7M-NOT: sdiv +; V7M: mls {{r[0-9]+}}, [[R2]], [[R1]], [[R0]] +; V7M-NOT: __aeabi_idivmod + %call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)() + %div = sdiv i32 %call, 1000000 + %rem = srem i32 %call, 1000000 + %add = add i32 %div, %rem + ret i32 %add +} + +; An early version of this patch caused isel to hang. The reason +; was that it shouldn't do the rewrite for i64 because that's not +; supported by hardware. Isel was stuck in a loop with type +; legalization and this optimisation. +; Function Attrs: norecurse nounwind +define i64 @isel_dont_hang(i32 %bar) local_unnamed_addr #4 { +entry: +; CHECK-LABEL: isel_dont_hang: +; CHECK: __aeabi_uldivmod + %temp.0 = sext i32 %bar to i64 + %mul83 = shl i64 %temp.0, 1 + %add84 = add i64 %temp.0, 2 + %div85 = udiv i64 %mul83, %add84 + ret i64 %div85 +} + +; i16 types are promoted to i32, and we expect a normal udiv here: +define i16 @isel_dont_hang_2(i16 %bar) local_unnamed_addr #4 { +entry: +; CHECK-LABEL: isel_dont_hang_2: +; CHECK: udiv +; CHECK-NOT: __aeabi_ + %mul83 = shl i16 %bar, 1 + %add84 = add i16 %bar, 2 + %div85 = udiv i16 %mul83, %add84 + ret i16 %div85 +} declare i32 @GetValue(...) local_unnamed_addr attributes #0 = { minsize nounwind optsize } +attributes #4 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-jump-tables"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a15" "target-features"="+dsp,+hwdiv,+hwdiv-arm,+neon,+vfp4" "use-soft-float"="false" } + |

