diff options
author | Jonathan Roelofs <jonathan@codesourcery.com> | 2014-08-20 23:38:50 +0000 |
---|---|---|
committer | Jonathan Roelofs <jonathan@codesourcery.com> | 2014-08-20 23:38:50 +0000 |
commit | 44937d98a30c5ad8b6fdcdfece07228b159fbf24 (patch) | |
tree | 9cfb09eadc8d22e548f35bb74445e887c1a2488f | |
parent | a56749064a34b8416e7ddbf01fb930488d7f22d6 (diff) | |
download | bcm5719-llvm-44937d98a30c5ad8b6fdcdfece07228b159fbf24.tar.gz bcm5719-llvm-44937d98a30c5ad8b6fdcdfece07228b159fbf24.zip |
Lower thumbv4t & thumbv5 lo->lo copies through a push-pop sequence
On pre-v6 hardware, 'MOV lo, lo' gives undefined results, so such copies need to
be avoided. This patch trades simplicity for implementation time at the expense
of performance... As they say: correctness first, then performance.
See http://lists.cs.uiuc.edu/pipermail/llvmdev/2014-August/075998.html for a few
ideas on how to make this better.
llvm-svn: 216138
-rw-r--r-- | llvm/lib/Target/ARM/Thumb1InstrInfo.cpp | 25 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/atomic-cmpxchg.ll | 7 | ||||
-rw-r--r-- | llvm/test/CodeGen/Thumb/copy_thumb.ll | 38 |
3 files changed, 66 insertions, 4 deletions
diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp index abde25081b0..8ea912e2703 100644 --- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "ARMSubtarget.h" #include "Thumb1InstrInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -41,10 +42,30 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc))); + // Need to check the arch. + MachineFunction &MF = *MBB.getParent(); + const ARMSubtarget &st = MF.getTarget().getSubtarget<ARMSubtarget>(); + assert(ARM::GPRRegClass.contains(DestReg, SrcReg) && "Thumb1 can only copy GPR registers"); + + if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg) + || !ARM::tGPRRegClass.contains(DestReg)) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc))); + else { + // FIXME: The performance consequences of this are going to be atrocious. + // Some things to try that should be better: + // * 'mov hi, $src; mov $dst, hi', with hi as either r10 or r11 + // * 'movs $dst, $src' if cpsr isn't live + // See: http://lists.cs.uiuc.edu/pipermail/llvmdev/2014-August/075998.html + + // 'MOV lo, lo' is unpredictable on < v6, so use the stack to do it + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPUSH))) + .addReg(SrcReg, getKillRegState(KillSrc)); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPOP))) + .addReg(DestReg, getDefRegState(true)); + } } void Thumb1InstrInfo:: diff --git a/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll b/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll index 4b79fa25145..84790be6d60 100644 --- a/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll +++ b/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll @@ -20,12 +20,15 @@ entry: ; CHECK-THUMB-LABEL: test_cmpxchg_res_i8 ; CHECK-THUMB: bl __sync_val_compare_and_swap_1 -; CHECK-THUMB: mov [[R1:r[0-9]+]], r0 +; CHECK-THUMB-NOT: mov [[R1:r[0-7]]], r0 +; CHECK-THUMB: push {r0} +; CHECK-THUMB: pop {[[R1:r[0-7]]]} ; CHECK-THUMB: movs r0, #1 ; CHECK-THUMB: movs [[R2:r[0-9]+]], #0 ; CHECK-THUMB: cmp [[R1]], {{r[0-9]+}} ; CHECK-THU<B: beq -; CHECK-THUMB: mov r0, [[R2]] +; CHECK-THUMB: push {[[R2]]} +; CHECK-THUMB: pop {r0} ; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8 ; CHECK-ARMV7: ldrexb [[R3:r[0-9]+]], [r0] diff --git a/llvm/test/CodeGen/Thumb/copy_thumb.ll b/llvm/test/CodeGen/Thumb/copy_thumb.ll new file mode 100644 index 00000000000..528f54bd84e --- /dev/null +++ b/llvm/test/CodeGen/Thumb/copy_thumb.ll @@ -0,0 +1,38 @@ +; RUN: llc -mtriple=armv4-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV +; RUN: llc -mtriple=armv4t-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV +; RUN: llc -mtriple=armv5-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV +; RUN: llc -mtriple=armv6-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV +; RUN: llc -mtriple=armv7-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV +; RUN: llc -mtriple=thumbv6-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV +; RUN: llc -mtriple=thumbv7-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV +; CHECK-LOLOMOV-LABEL: foo +; CHECK-LOLOMOV: mov [[TMP:r[0-7]]], [[SRC1:r[01]]] +; CHECK-LOLOMOV-NEXT: mov [[SRC1]], [[SRC2:r[01]]] +; CHECK-LOLOMOV-NEXT: mov [[SRC2]], [[TMP]] +; CHECK-LOLOMOV-LABEL: bar +; CHECK-LOLOMOV-LABEL: fnend +; +; 'MOV lo, lo' in Thumb mode produces undefined results on pre-v6 hardware +; RUN: llc -mtriple=thumbv4t-none--eabi < %s | FileCheck %s --check-prefix=CHECK-NOLOLOMOV +; RUN: llc -mtriple=thumbv5-none--eabi < %s | FileCheck %s --check-prefix=CHECK-NOLOLOMOV +; CHECK-NOLOLOMOV-LABEL: foo +; CHECK-NOLOLOMOV-NOT: mov [[TMP:r[0-7]]], [[SRC1:r[01]]] +; CHECK-NOLOLOMOV: push {[[SRC1:r[01]]]} +; CHECK-NOLOLOMOV-NEXT: pop {[[TMP:r[0-7]]]} +; CHECK-NOLOLOMOV-NOT: mov [[TMP:r[0-7]]], [[SRC1:r[01]]] +; CHECK-NOLOLOMOV: push {[[SRC2:r[01]]]} +; CHECK-NOLOLOMOV-NEXT: pop {[[SRC1]]} +; CHECK-NOLOLOMOV-NOT: mov [[TMP:r[0-7]]], [[SRC1:r[01]]] +; CHECK-NOLOLOMOV: push {[[TMP]]} +; CHECK-NOLOLOMOV-NEXT: pop {[[SRC2]]} +; CHECK-NOLOLOMOV-LABEL: bar +; CHECK-NOLOLOMOV-LABEL: fnend + +declare void @bar(i32, i32) + +define void @foo(i32 %a, i32 %b) { +entry: + call void @bar(i32 %b, i32 %a); + ret void +} + |