[Thumb] Add support for tMUL in the compare instruction peephole optimizer.

We also want to optimise tests like this: return a*b == 0. The MULS instruction is flag setting, so we don't need the CMP instruction but can instead branch on the result of the MULS. The generated instructions sequence for this example was: MULS, MOVS, MOVS, CMP. The MOVS instruction load the boolean values resulting from the select instruction, but these MOVS instructions are flag setting and were thus preventing this optimisation. Now we first reorder and move the MULS to before the CMP and generate sequence MOVS, MOVS, MULS, CMP so that the optimisation could trigger. Reordering of the MULS and MOVS is safe to do because the subsequent MOVS instructions just set the CPSR register and don't use it, i.e. the CPSR is dead. Differential Revision: https://reviews.llvm.org/D27990 llvm-svn: 292608
author: Sjoerd Meijer <sjoerd.meijer@arm.com> 2017-01-20 13:10:12 +0000
committer: Sjoerd Meijer <sjoerd.meijer@arm.com> 2017-01-20 13:10:12 +0000
commit: 2db2a947f64b9291d6ef445e6df272ae159e0e21 (patch)
tree: 9b60fe551acb1764e743931f8ee8f5495a2aac22 /llvm/test/CodeGen
parent: 78bfa04dbe6ab63b19b667f5d0348ad589f7249b (diff)
download: bcm5719-llvm-2db2a947f64b9291d6ef445e6df272ae159e0e21.tar.gz
bcm5719-llvm-2db2a947f64b9291d6ef445e6df272ae159e0e21.zip
2 files changed, 186 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/ARM/cmp1-peephole-thumb.mir b/llvm/test/CodeGen/ARM/cmp1-peephole-thumb.mir
new file mode 100644
index 00000000000..5ace58fd065
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/cmp1-peephole-thumb.mir
@@ -0,0 +1,78 @@
+# RUN: llc -run-pass=peephole-opt %s -o - | FileCheck %s
+
+--- |
+  ; ModuleID = '<stdin>'
+  source_filename = "<stdin>"
+  target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumb-none--eabi"
+
+  define i32 @f(i32 %a, i32 %b) {
+  entry:
+    %mul = mul nsw i32 %b, %a
+    %cmp = icmp eq i32 %mul, 0
+    %conv = zext i1 %cmp to i32
+    ret i32 %conv
+  }
+
+...
+---
+name:            f
+# CHECK-LABEL: name: f
+alignment:       1
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: tgpr }
+  - { id: 1, class: tgpr }
+  - { id: 2, class: tgpr }
+  - { id: 3, class: tgpr }
+  - { id: 4, class: tgpr }
+  - { id: 5, class: tgpr }
+liveins:
+  - { reg: '%r0', virtual-reg: '%0' }
+  - { reg: '%r1', virtual-reg: '%1' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+
+# CHECK:  tMOVi8 1, 14, _
+# CHECK:  tMOVi8 0, 14, _
+# CHECK:  tMUL %1, %0, 14, _
+# CHECK-NOT: tCMPi8
+body:             |
+  bb.0.entry:
+    successors: %bb.1.entry(0x40000000), %bb.2.entry(0x40000000)
+    liveins: %r0, %r1
+
+    %1 = COPY %r1
+    %0 = COPY %r0
+    %2, %cpsr = tMUL %1, %0, 14, _
+    %3, %cpsr = tMOVi8 1, 14, _
+    %4, %cpsr = tMOVi8 0, 14, _
+    tCMPi8 killed %2, 0, 14, _, implicit-def %cpsr
+    tBcc %bb.2.entry, 0, %cpsr
+
+  bb.1.entry:
+    successors: %bb.2.entry(0x80000000)
+
+
+  bb.2.entry:
+    %5 = PHI %4, %bb.1.entry, %3, %bb.0.entry
+    %r0 = COPY %5
+    tBX_RET 14, _, implicit %r0
+
+...
diff --git a/llvm/test/CodeGen/ARM/cmp2-peephole-thumb.mir b/llvm/test/CodeGen/ARM/cmp2-peephole-thumb.mir
new file mode 100644
index 00000000000..6e9ca70f174
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/cmp2-peephole-thumb.mir
@@ -0,0 +1,108 @@
+# RUN: llc -run-pass=peephole-opt %s -o - | FileCheck %s
+
+# Here we check that the peephole cmp rewrite is not triggered, because
+# there is store instruction between the tMUL and tCMP, i.e. there are
+# no constants to reorder.
+
+--- |
+  ; ModuleID = 'cmp2-peephole-thumb.ll'
+  source_filename = "<stdin>"
+  target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumb-none--eabi"
+
+  define i32 @g(i32 %a, i32 %b) {
+  entry:
+    %retval = alloca i32, align 4
+    %mul = alloca i32, align 4
+    %mul1 = mul nsw i32 %a, %b
+    store i32 %mul1, i32* %mul, align 4
+    %0 = load i32, i32* %mul, align 4
+    %cmp = icmp sle i32 %0, 0
+    br i1 %cmp, label %if.then, label %if.end
+
+  if.then:                                          ; preds = %entry
+    store i32 42, i32* %retval, align 4
+    br label %return
+
+  if.end:                                           ; preds = %entry
+    store i32 1, i32* %retval, align 4
+    br label %return
+
+  return:                                           ; preds = %if.end, %if.then
+    %1 = load i32, i32* %retval, align 4
+    ret i32 %1
+  }
+
+...
+---
+name:            g
+# CHECK-LABEL: name: g
+alignment:       1
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: tgpr }
+  - { id: 1, class: tgpr }
+  - { id: 2, class: tgpr }
+  - { id: 3, class: tgpr }
+  - { id: 4, class: tgpr }
+  - { id: 5, class: tgpr }
+liveins:
+  - { reg: '%r0', virtual-reg: '%0' }
+  - { reg: '%r1', virtual-reg: '%1' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        false
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+stack:
+  - { id: 0, name: retval, offset: 0, size: 4, alignment: 4, local-offset: -4 }
+  - { id: 1, name: mul, offset: 0, size: 4, alignment: 4, local-offset: -8 }
+
+# CHECK: tMUL
+# CHECK-NEXT: tSTRspi
+# CHECK-NEXT: tCMPi8
+body:             |
+  bb.0.entry:
+    successors: %bb.1.if.then(0x40000000), %bb.2.if.end(0x40000000)
+    liveins: %r0, %r1
+
+    %1 = COPY %r1
+    %0 = COPY %r0
+    %2, %cpsr = tMUL %0, %1, 14, _
+    tSTRspi %2, %stack.1.mul, 0, 14, _ :: (store 4 into %ir.mul)
+    tCMPi8 %2, 0, 14, _, implicit-def %cpsr
+    tBcc %bb.2.if.end, 12, %cpsr
+    tB %bb.1.if.then, 14, _
+
+  bb.1.if.then:
+    successors: %bb.3.return(0x80000000)
+
+    %4, %cpsr = tMOVi8 42, 14, _
+    tSTRspi killed %4, %stack.0.retval, 0, 14, _ :: (store 4 into %ir.retval)
+    tB %bb.3.return, 14, _
+
+  bb.2.if.end:
+    successors: %bb.3.return(0x80000000)
+
+    %3, %cpsr = tMOVi8 1, 14, _
+    tSTRspi killed %3, %stack.0.retval, 0, 14, _ :: (store 4 into %ir.retval)
+
+  bb.3.return:
+    %5 = tLDRspi %stack.0.retval, 0, 14, _ :: (dereferenceable load 4 from %ir.retval)
+    %r0 = COPY %5
+    tBX_RET 14, _, implicit %r0
+
+...
author	Sjoerd Meijer <sjoerd.meijer@arm.com>	2017-01-20 13:10:12 +0000
committer	Sjoerd Meijer <sjoerd.meijer@arm.com>	2017-01-20 13:10:12 +0000
commit	2db2a947f64b9291d6ef445e6df272ae159e0e21 (patch)
tree	9b60fe551acb1764e743931f8ee8f5495a2aac22 /llvm/test/CodeGen
parent	78bfa04dbe6ab63b19b667f5d0348ad589f7249b (diff)
download	bcm5719-llvm-2db2a947f64b9291d6ef445e6df272ae159e0e21.tar.gz bcm5719-llvm-2db2a947f64b9291d6ef445e6df272ae159e0e21.zip