summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorChuang-Yu Cheng <cycheng@multicorewareinc.com>2016-04-12 03:04:44 +0000
committerChuang-Yu Cheng <cycheng@multicorewareinc.com>2016-04-12 03:04:44 +0000
commit6efde2fb451bf6c5538ca38cfb44cf84a7bb0e72 (patch)
treea1c02d803a815144b37a07df7672365b45ada2ce /llvm
parent9b3f99e50f5aff081352f362c1923e2dd3ed3aaf (diff)
downloadbcm5719-llvm-6efde2fb451bf6c5538ca38cfb44cf84a7bb0e72.tar.gz
bcm5719-llvm-6efde2fb451bf6c5538ca38cfb44cf84a7bb0e72.zip
[PPC64] Use mfocrf in prologue when we only need to save 1 nonvolatile CR field
In the ELFv2 ABI, we are not required to save all CR fields. If only one nonvolatile CR field is clobbered, use mfocrf instead of mfcr to selectively save the field, because mfocrf has short latency compares to mfcr. Thanks Nemanja's invaluable hint! Reviewers: nemanjai tjablin hfinkel kbarton http://reviews.llvm.org/D17749 llvm-svn: 266038
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/PowerPC/PPCFrameLowering.cpp24
-rw-r--r--llvm/test/CodeGen/PowerPC/crsave.ll20
-rw-r--r--llvm/test/CodeGen/PowerPC/pr26690.ll2
3 files changed, 37 insertions, 9 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index aadf64d0bbe..40a2e8bbe23 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -838,11 +838,15 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
// If we need to spill the CR and the LR but we don't have two separate
// registers available, we must spill them one at a time
if (MustSaveCR && SingleScratchReg && MustSaveLR) {
- // FIXME: In the ELFv2 ABI, we are not required to save all CR fields.
- // If only one or two CR fields are clobbered, it could be more
- // efficient to use mfocrf to selectively save just those fields.
+ // In the ELFv2 ABI, we are not required to save all CR fields.
+ // If only one or two CR fields are clobbered, it is more efficient to use
+ // mfocrf to selectively save just those fields, because mfocrf has short
+ // latency compares to mfcr.
+ unsigned MfcrOpcode = PPC::MFCR8;
+ if (isELFv2ABI && MustSaveCRs.size() == 1)
+ MfcrOpcode = PPC::MFOCRF8;
MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), TempReg);
+ BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill);
BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
@@ -856,11 +860,15 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
if (MustSaveCR &&
!(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
- // FIXME: In the ELFv2 ABI, we are not required to save all CR fields.
- // If only one or two CR fields are clobbered, it could be more
- // efficient to use mfocrf to selectively save just those fields.
+ // In the ELFv2 ABI, we are not required to save all CR fields.
+ // If only one or two CR fields are clobbered, it is more efficient to use
+ // mfocrf to selectively save just those fields, because mfocrf has short
+ // latency compares to mfcr.
+ unsigned MfcrOpcode = PPC::MFCR8;
+ if (isELFv2ABI && MustSaveCRs.size() == 1)
+ MfcrOpcode = PPC::MFOCRF8;
MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), TempReg);
+ BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill);
}
diff --git a/llvm/test/CodeGen/PowerPC/crsave.ll b/llvm/test/CodeGen/PowerPC/crsave.ll
index 8121e1b6e63..4975afcce75 100644
--- a/llvm/test/CodeGen/PowerPC/crsave.ll
+++ b/llvm/test/CodeGen/PowerPC/crsave.ll
@@ -1,5 +1,6 @@
; RUN: llc -O0 -disable-fp-elim -mtriple=powerpc-unknown-linux-gnu -mcpu=g5 < %s | FileCheck %s -check-prefix=PPC32
; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 < %s | FileCheck %s -check-prefix=PPC64
+; RUN: llc -O0 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefix=PPC64-ELFv2
declare void @foo()
@@ -60,3 +61,22 @@ entry:
; PPC64: mtocrf 16, 12
; PPC64: mtocrf 8, 12
+; Generate mfocrf in prologue when we need to save 1 nonvolatile CR field
+define void @cloberOneNvCrField() {
+entry:
+ tail call void asm sideeffect "# clobbers", "~{cr2}"()
+ ret void
+
+; PPC64-ELFv2-LABEL: @cloberOneNvCrField
+; PPC64-ELFv2: mfocrf [[REG1:[0-9]+]], 32
+}
+
+; Generate mfcr in prologue when we need to save all nonvolatile CR field
+define void @cloberAllNvCrField() {
+entry:
+ tail call void asm sideeffect "# clobbers", "~{cr2},~{cr3},~{cr4}"()
+ ret void
+
+; PPC64-ELFv2-LABEL: @cloberAllNvCrField
+; PPC64-ELFv2: mfcr [[REG1:[0-9]+]]
+}
diff --git a/llvm/test/CodeGen/PowerPC/pr26690.ll b/llvm/test/CodeGen/PowerPC/pr26690.ll
index 524e8b524bf..e1c3c496ed4 100644
--- a/llvm/test/CodeGen/PowerPC/pr26690.ll
+++ b/llvm/test/CodeGen/PowerPC/pr26690.ll
@@ -101,7 +101,7 @@ if.end16: ; preds = %entry, %if.end13, %
ret i32 2
}
-; CHECK: mfcr {{[0-9]+}}
+; CHECK: mfocrf {{[0-9]+}}
!llvm.ident = !{!0}
OpenPOWER on IntegriCloud