summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86InstrInfo.cpp
diff options
context:
space:
mode:
authorBill Wendling <isanbard@gmail.com>2010-03-05 00:24:26 +0000
committerBill Wendling <isanbard@gmail.com>2010-03-05 00:24:26 +0000
commit6517f88f25f0687e563526c0b934d2bcf9211130 (patch)
tree07a8baccc6fecc46286f4e187d0510bfdf0c4c76 /llvm/lib/Target/X86/X86InstrInfo.cpp
parent2061c84109fb36f34745a700bc5283e13b51d736 (diff)
downloadbcm5719-llvm-6517f88f25f0687e563526c0b934d2bcf9211130.tar.gz
bcm5719-llvm-6517f88f25f0687e563526c0b934d2bcf9211130.zip
Micro-optimization:
This code: float floatingPointComparison(float x, float y) { double product = (double)x * y; if (product == 0.0) return product; return product - 1.0; } produces this: _floatingPointComparison: 0000000000000000 cvtss2sd %xmm1,%xmm1 0000000000000004 cvtss2sd %xmm0,%xmm0 0000000000000008 mulsd %xmm1,%xmm0 000000000000000c pxor %xmm1,%xmm1 0000000000000010 ucomisd %xmm1,%xmm0 0000000000000014 jne 0x00000004 0000000000000016 jp 0x00000002 0000000000000018 jmp 0x00000008 000000000000001a addsd 0x00000006(%rip),%xmm0 0000000000000022 cvtsd2ss %xmm0,%xmm0 0000000000000026 ret The "jne/jp/jmp" sequence can be reduced to this instead: _floatingPointComparison: 0000000000000000 cvtss2sd %xmm1,%xmm1 0000000000000004 cvtss2sd %xmm0,%xmm0 0000000000000008 mulsd %xmm1,%xmm0 000000000000000c pxor %xmm1,%xmm1 0000000000000010 ucomisd %xmm1,%xmm0 0000000000000014 jp 0x00000002 0000000000000016 je 0x00000008 0000000000000018 addsd 0x00000006(%rip),%xmm0 0000000000000020 cvtsd2ss %xmm0,%xmm0 0000000000000024 ret for a savings of 2 bytes. This xform can happen when we recognize that jne and jp jump to the same "true" MBB, the unconditional jump would jump to the "false" MBB, and the "true" branch is the fall-through MBB. llvm-svn: 97766
Diffstat (limited to 'llvm/lib/Target/X86/X86InstrInfo.cpp')
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp65
1 files changed, 52 insertions, 13 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 39bda04b4d1..0d3b54fe8e2 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -1786,6 +1786,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
const SmallVectorImpl<MachineOperand> &Cond) const {
// FIXME this should probably have a DebugLoc operand
DebugLoc dl = DebugLoc::getUnknownLoc();
+
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 1 || Cond.size() == 0) &&
@@ -1799,34 +1800,72 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
}
// Conditional branch.
+ const MachineBasicBlock *NextBB = next(&MBB);
unsigned Count = 0;
X86::CondCode CC = (X86::CondCode)Cond[0].getImm();
+
+ // In a two-way conditional branch, if the fall-through block is the
+ // "false" branch of the conditional jumps, we can cut out the
+ // unconditional jump by rearranging the conditional jumps. This saves a
+ // few bytes and improves performance. I.e., for COND_NE_OR_P:
+ //
+ // JNE L1
+ // JP L1
+ // JMP L2
+ // L1:
+ // ...
+ // L2:
+ // ...
+ //
+ // to:
+ //
+ // JP L1
+ // JE L2
+ // L1:
+ // ...
+ // L2:
+ // ...
+ //
+ // Similarly for COND_NP_OR_E.
switch (CC) {
+ default:
+ BuildMI(&MBB, dl, get(GetCondBranchFromCond(CC))).addMBB(TBB);
+ ++Count;
+ break;
case X86::COND_NP_OR_E:
// Synthesize NP_OR_E with two branches.
- BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB);
- ++Count;
- BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(TBB);
- ++Count;
+ if (FBB && FBB == NextBB) {
+ BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB);
+ BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(FBB);
+ FBB = 0;
+ } else {
+ BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB);
+ BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(TBB);
+ }
+
+ Count += 2;
break;
case X86::COND_NE_OR_P:
// Synthesize NE_OR_P with two branches.
- BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(TBB);
- ++Count;
- BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB);
- ++Count;
+ if (FBB && FBB == NextBB) {
+ BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB);
+ BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(FBB);
+ FBB = 0;
+ } else {
+ BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(TBB);
+ BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB);
+ }
+
+ Count += 2;
break;
- default: {
- unsigned Opc = GetCondBranchFromCond(CC);
- BuildMI(&MBB, dl, get(Opc)).addMBB(TBB);
- ++Count;
- }
}
+
if (FBB) {
// Two-way Conditional branch. Insert the second branch.
BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(FBB);
++Count;
}
+
return Count;
}
OpenPOWER on IntegriCloud