diff options
| author | Ahmed Bougacha <ahmed.bougacha@gmail.com> | 2015-03-03 01:21:16 +0000 |
|---|---|---|
| committer | Ahmed Bougacha <ahmed.bougacha@gmail.com> | 2015-03-03 01:21:16 +0000 |
| commit | afbd6887c4a70a6bbaa1d7c423a5f78e07d8bce4 (patch) | |
| tree | 575d8944cc6c0a0caab38819c6fdfe026cf5579b /llvm/lib | |
| parent | 066d0b8e646ef91c787dc4deea98433cceab7793 (diff) | |
| download | bcm5719-llvm-afbd6887c4a70a6bbaa1d7c423a5f78e07d8bce4.tar.gz bcm5719-llvm-afbd6887c4a70a6bbaa1d7c423a5f78e07d8bce4.zip | |
[X86] Special-case 2x CMOV when custom-inserting.
This lets us avoid a few copies that are otherwise hard to get rid of.
The way this is done is, the custom-inserter looks at the following
instruction for another CMOV, and replaces both at the same time.
A previous version used a new CMOV2 opcode, but the custom inserter
is expected to be able to return a different basic block anyway, which
means it's OK - though far from ideal - to alter that block's contents.
Explicitly document that, in case it ever makes a difference.
Alternatives welcome!
Follow-up to r231045.
rdar://19767934
Closes http://reviews.llvm.org/D8019
llvm-svn: 231046
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 132 |
1 files changed, 125 insertions, 7 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a8e166c5891..ec0f0033a40 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -18108,6 +18108,92 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, // fallthrough --> copy0MBB MachineBasicBlock *thisMBB = BB; MachineFunction *F = BB->getParent(); + + // We also lower double CMOVs: + // (CMOV (CMOV F, T, cc1), T, cc2) + // to two successives branches. For that, we look for another CMOV as the + // following instruction. + // + // Without this, we would add a PHI between the two jumps, which ends up + // creating a few copies all around. For instance, for + // + // (sitofp (zext (fcmp une))) + // + // we would generate: + // + // ucomiss %xmm1, %xmm0 + // movss <1.0f>, %xmm0 + // movaps %xmm0, %xmm1 + // jne .LBB5_2 + // xorps %xmm1, %xmm1 + // .LBB5_2: + // jp .LBB5_4 + // movaps %xmm1, %xmm0 + // .LBB5_4: + // retq + // + // because this custom-inserter would have generated: + // + // A + // | \ + // | B + // | / + // C + // | \ + // | D + // | / + // E + // + // A: X = ...; Y = ... + // B: empty + // C: Z = PHI [X, A], [Y, B] + // D: empty + // E: PHI [X, C], [Z, D] + // + // If we lower both CMOVs in a single step, we can instead generate: + // + // A + // | \ + // | C + // | /| + // |/ | + // | | + // | D + // | / + // E + // + // A: X = ...; Y = ... + // D: empty + // E: PHI [X, A], [X, C], [Y, D] + // + // Which, in our sitofp/fcmp example, gives us something like: + // + // ucomiss %xmm1, %xmm0 + // movss <1.0f>, %xmm0 + // jne .LBB5_4 + // jp .LBB5_4 + // xorps %xmm0, %xmm0 + // .LBB5_4: + // retq + // + MachineInstr *NextCMOV = nullptr; + MachineBasicBlock::iterator NextMIIt = + std::next(MachineBasicBlock::iterator(MI)); + if (NextMIIt != BB->end() && NextMIIt->getOpcode() == MI->getOpcode() && + NextMIIt->getOperand(2).getReg() == MI->getOperand(2).getReg() && + NextMIIt->getOperand(1).getReg() == MI->getOperand(0).getReg()) + NextCMOV = &*NextMIIt; + + MachineBasicBlock *jcc1MBB = nullptr; + + // If we have a double CMOV, we lower it to two successive branches to + // the same block. EFLAGS is used by both, so mark it as live in the second. + if (NextCMOV) { + jcc1MBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, jcc1MBB); + jcc1MBB->addLiveIn(X86::EFLAGS); + } + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, copy0MBB); @@ -18116,8 +18202,10 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, // If the EFLAGS register isn't dead in the terminator, then claim that it's // live into the sink and copy blocks. const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); - if (!MI->killsRegister(X86::EFLAGS) && - !checkAndUpdateEFLAGSKill(MI, BB, TRI)) { + + MachineInstr *LastEFLAGSUser = NextCMOV ? NextCMOV : MI; + if (!LastEFLAGSUser->killsRegister(X86::EFLAGS) && + !checkAndUpdateEFLAGSKill(LastEFLAGSUser, BB, TRI)) { copy0MBB->addLiveIn(X86::EFLAGS); sinkMBB->addLiveIn(X86::EFLAGS); } @@ -18128,7 +18216,19 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, sinkMBB->transferSuccessorsAndUpdatePHIs(BB); // Add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); + if (NextCMOV) { + // The fallthrough block may be jcc1MBB, if we have a double CMOV. + BB->addSuccessor(jcc1MBB); + + // In that case, jcc1MBB will itself fallthrough the copy0MBB, and + // jump to the sinkMBB. + jcc1MBB->addSuccessor(copy0MBB); + jcc1MBB->addSuccessor(sinkMBB); + } else { + BB->addSuccessor(copy0MBB); + } + + // The true block target of the first (or only) branch is always sinkMBB. BB->addSuccessor(sinkMBB); // Create the conditional branch instruction. @@ -18136,6 +18236,12 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB); + if (NextCMOV) { + unsigned Opc2 = X86::GetCondBranchFromCond( + (X86::CondCode)NextCMOV->getOperand(3).getImm()); + BuildMI(jcc1MBB, DL, TII->get(Opc2)).addMBB(sinkMBB); + } + // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB @@ -18144,10 +18250,22 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... - BuildMI(*sinkMBB, sinkMBB->begin(), DL, - TII->get(X86::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) - .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); + MachineInstrBuilder MIB = + BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI), + MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) + .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); + + // If we have a double CMOV, the second Jcc provides the same incoming + // value as the first Jcc (the True operand of the SELECT_CC/CMOV nodes). + if (NextCMOV) { + MIB.addReg(MI->getOperand(2).getReg()).addMBB(jcc1MBB); + // Copy the PHI result to the register defined by the second CMOV. + BuildMI(*sinkMBB, std::next(MachineBasicBlock::iterator(MIB.getInstr())), + DL, TII->get(TargetOpcode::COPY), NextCMOV->getOperand(0).getReg()) + .addReg(MI->getOperand(0).getReg()); + NextCMOV->eraseFromParent(); + } MI->eraseFromParent(); // The pseudo instruction is gone now. return sinkMBB; |

