summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorAhmed Bougacha <ahmed.bougacha@gmail.com>2015-03-03 01:21:16 +0000
committerAhmed Bougacha <ahmed.bougacha@gmail.com>2015-03-03 01:21:16 +0000
commitafbd6887c4a70a6bbaa1d7c423a5f78e07d8bce4 (patch)
tree575d8944cc6c0a0caab38819c6fdfe026cf5579b /llvm/lib
parent066d0b8e646ef91c787dc4deea98433cceab7793 (diff)
downloadbcm5719-llvm-afbd6887c4a70a6bbaa1d7c423a5f78e07d8bce4.tar.gz
bcm5719-llvm-afbd6887c4a70a6bbaa1d7c423a5f78e07d8bce4.zip
[X86] Special-case 2x CMOV when custom-inserting.
This lets us avoid a few copies that are otherwise hard to get rid of. The way this is done is, the custom-inserter looks at the following instruction for another CMOV, and replaces both at the same time. A previous version used a new CMOV2 opcode, but the custom inserter is expected to be able to return a different basic block anyway, which means it's OK - though far from ideal - to alter that block's contents. Explicitly document that, in case it ever makes a difference. Alternatives welcome! Follow-up to r231045. rdar://19767934 Closes http://reviews.llvm.org/D8019 llvm-svn: 231046
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp132
1 files changed, 125 insertions, 7 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a8e166c5891..ec0f0033a40 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18108,6 +18108,92 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// fallthrough --> copy0MBB
MachineBasicBlock *thisMBB = BB;
MachineFunction *F = BB->getParent();
+
+ // We also lower double CMOVs:
+ // (CMOV (CMOV F, T, cc1), T, cc2)
+ // to two successives branches. For that, we look for another CMOV as the
+ // following instruction.
+ //
+ // Without this, we would add a PHI between the two jumps, which ends up
+ // creating a few copies all around. For instance, for
+ //
+ // (sitofp (zext (fcmp une)))
+ //
+ // we would generate:
+ //
+ // ucomiss %xmm1, %xmm0
+ // movss <1.0f>, %xmm0
+ // movaps %xmm0, %xmm1
+ // jne .LBB5_2
+ // xorps %xmm1, %xmm1
+ // .LBB5_2:
+ // jp .LBB5_4
+ // movaps %xmm1, %xmm0
+ // .LBB5_4:
+ // retq
+ //
+ // because this custom-inserter would have generated:
+ //
+ // A
+ // | \
+ // | B
+ // | /
+ // C
+ // | \
+ // | D
+ // | /
+ // E
+ //
+ // A: X = ...; Y = ...
+ // B: empty
+ // C: Z = PHI [X, A], [Y, B]
+ // D: empty
+ // E: PHI [X, C], [Z, D]
+ //
+ // If we lower both CMOVs in a single step, we can instead generate:
+ //
+ // A
+ // | \
+ // | C
+ // | /|
+ // |/ |
+ // | |
+ // | D
+ // | /
+ // E
+ //
+ // A: X = ...; Y = ...
+ // D: empty
+ // E: PHI [X, A], [X, C], [Y, D]
+ //
+ // Which, in our sitofp/fcmp example, gives us something like:
+ //
+ // ucomiss %xmm1, %xmm0
+ // movss <1.0f>, %xmm0
+ // jne .LBB5_4
+ // jp .LBB5_4
+ // xorps %xmm0, %xmm0
+ // .LBB5_4:
+ // retq
+ //
+ MachineInstr *NextCMOV = nullptr;
+ MachineBasicBlock::iterator NextMIIt =
+ std::next(MachineBasicBlock::iterator(MI));
+ if (NextMIIt != BB->end() && NextMIIt->getOpcode() == MI->getOpcode() &&
+ NextMIIt->getOperand(2).getReg() == MI->getOperand(2).getReg() &&
+ NextMIIt->getOperand(1).getReg() == MI->getOperand(0).getReg())
+ NextCMOV = &*NextMIIt;
+
+ MachineBasicBlock *jcc1MBB = nullptr;
+
+ // If we have a double CMOV, we lower it to two successive branches to
+ // the same block. EFLAGS is used by both, so mark it as live in the second.
+ if (NextCMOV) {
+ jcc1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, jcc1MBB);
+ jcc1MBB->addLiveIn(X86::EFLAGS);
+ }
+
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(It, copy0MBB);
@@ -18116,8 +18202,10 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// If the EFLAGS register isn't dead in the terminator, then claim that it's
// live into the sink and copy blocks.
const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
- if (!MI->killsRegister(X86::EFLAGS) &&
- !checkAndUpdateEFLAGSKill(MI, BB, TRI)) {
+
+ MachineInstr *LastEFLAGSUser = NextCMOV ? NextCMOV : MI;
+ if (!LastEFLAGSUser->killsRegister(X86::EFLAGS) &&
+ !checkAndUpdateEFLAGSKill(LastEFLAGSUser, BB, TRI)) {
copy0MBB->addLiveIn(X86::EFLAGS);
sinkMBB->addLiveIn(X86::EFLAGS);
}
@@ -18128,7 +18216,19 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
// Add the true and fallthrough blocks as its successors.
- BB->addSuccessor(copy0MBB);
+ if (NextCMOV) {
+ // The fallthrough block may be jcc1MBB, if we have a double CMOV.
+ BB->addSuccessor(jcc1MBB);
+
+ // In that case, jcc1MBB will itself fallthrough the copy0MBB, and
+ // jump to the sinkMBB.
+ jcc1MBB->addSuccessor(copy0MBB);
+ jcc1MBB->addSuccessor(sinkMBB);
+ } else {
+ BB->addSuccessor(copy0MBB);
+ }
+
+ // The true block target of the first (or only) branch is always sinkMBB.
BB->addSuccessor(sinkMBB);
// Create the conditional branch instruction.
@@ -18136,6 +18236,12 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
+ if (NextCMOV) {
+ unsigned Opc2 = X86::GetCondBranchFromCond(
+ (X86::CondCode)NextCMOV->getOperand(3).getImm());
+ BuildMI(jcc1MBB, DL, TII->get(Opc2)).addMBB(sinkMBB);
+ }
+
// copy0MBB:
// %FalseValue = ...
// # fallthrough to sinkMBB
@@ -18144,10 +18250,22 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// sinkMBB:
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
- BuildMI(*sinkMBB, sinkMBB->begin(), DL,
- TII->get(X86::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
- .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+ MachineInstrBuilder MIB =
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI),
+ MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+ // If we have a double CMOV, the second Jcc provides the same incoming
+ // value as the first Jcc (the True operand of the SELECT_CC/CMOV nodes).
+ if (NextCMOV) {
+ MIB.addReg(MI->getOperand(2).getReg()).addMBB(jcc1MBB);
+ // Copy the PHI result to the register defined by the second CMOV.
+ BuildMI(*sinkMBB, std::next(MachineBasicBlock::iterator(MIB.getInstr())),
+ DL, TII->get(TargetOpcode::COPY), NextCMOV->getOperand(0).getReg())
+ .addReg(MI->getOperand(0).getReg());
+ NextCMOV->eraseFromParent();
+ }
MI->eraseFromParent(); // The pseudo instruction is gone now.
return sinkMBB;
OpenPOWER on IntegriCloud