diff options
| author | Chandler Carruth <chandlerc@gmail.com> | 2018-05-15 20:16:57 +0000 |
|---|---|---|
| committer | Chandler Carruth <chandlerc@gmail.com> | 2018-05-15 20:16:57 +0000 |
| commit | 5ecd81aab0ddc74abb4ccef86e1aa8dcead3f5c2 (patch) | |
| tree | 278c7e23b09b965a49b2791dd9c4bb0aa171990c /llvm/lib/Target | |
| parent | ec8598efb17216c0f4c93040ac1b40f97ae01fc7 (diff) | |
| download | bcm5719-llvm-5ecd81aab0ddc74abb4ccef86e1aa8dcead3f5c2.tar.gz bcm5719-llvm-5ecd81aab0ddc74abb4ccef86e1aa8dcead3f5c2.zip | |
[x86][eflags] Fix PR37431 by teaching the EFLAGS copy lowering to
specially handle SETB_C* pseudo instructions.
Summary:
While the logic here is somewhat similar to the arithmetic lowering, it
is different enough that it made sense to have its own function.
I actually tried a bunch of different optimizations here and none worked
well so I gave up and just always do the arithmetic based lowering.
Looking at code from the PR test case, we actually pessimize a bunch of
code when generating these. Because SETB_C* pseudo instructions clobber
EFLAGS, we end up creating a bunch of copies of EFLAGS to feed multiple
SETB_C* pseudos from a single set of EFLAGS. This in turn causes the
lowering code to ruin all the clever code generation that SETB_C* was
hoping to achieve. None of this is needed. Whenever we're generating
multiple SETB_C* instructions from a single set of EFLAGS we should
instead generate a single maximally wide one and extract subregs for all
the different desired widths. That would result in substantially better
code generation. But this patch doesn't attempt to address that.
The test case from the PR is included as well as more directed testing
of the specific lowering pattern used for these pseudos.
Reviewers: craig.topper
Subscribers: sanjoy, mcrosier, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D46799
llvm-svn: 332389
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86FlagsCopyLowering.cpp | 145 |
1 files changed, 142 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp index c18739254b0..bc504824446 100644 --- a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp +++ b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp @@ -127,6 +127,10 @@ private: MachineInstr &JmpI, CondRegArray &CondRegs); void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse, MachineInstr &CopyDefI); + void rewriteSetCarryExtended(MachineBasicBlock &TestMBB, + MachineBasicBlock::iterator TestPos, + DebugLoc TestLoc, MachineInstr &SetBI, + MachineOperand &FlagUse, CondRegArray &CondRegs); void rewriteSetCC(MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, MachineInstr &SetCCI, MachineOperand &FlagUse, @@ -512,8 +516,7 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) { } else if (MI.getOpcode() == TargetOpcode::COPY) { rewriteCopy(MI, *FlagUse, CopyDefI); } else { - // We assume that arithmetic instructions that use flags also def - // them. + // We assume all other instructions that use flags also def them. assert(MI.findRegisterDefOperand(X86::EFLAGS) && "Expected a def of EFLAGS for this instruction!"); @@ -525,7 +528,23 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) { // logic. FlagsKilled = true; - rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); + switch (MI.getOpcode()) { + case X86::SETB_C8r: + case X86::SETB_C16r: + case X86::SETB_C32r: + case X86::SETB_C64r: + // Use custom lowering for arithmetic that is merely extending the + // carry flag. We model this as the SETB_C* pseudo instructions. + rewriteSetCarryExtended(TestMBB, TestPos, TestLoc, MI, *FlagUse, + CondRegs); + break; + + default: + // Generically handle remaining uses as arithmetic instructions. + rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, + CondRegs); + break; + } break; } @@ -753,6 +772,126 @@ void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI, MI.eraseFromParent(); } +void X86FlagsCopyLoweringPass::rewriteSetCarryExtended( + MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, + DebugLoc TestLoc, MachineInstr &SetBI, MachineOperand &FlagUse, + CondRegArray &CondRegs) { + // This routine is only used to handle pseudos for setting a register to zero + // or all ones based on CF. This is essentially the sign extended from 1-bit + // form of SETB and modeled with the SETB_C* pseudos. They require special + // handling as they aren't normal SETcc instructions and are lowered to an + // EFLAGS clobbering operation (SBB typically). One simplifying aspect is that + // they are only provided in reg-defining forms. A complicating factor is that + // they can define many different register widths. + assert(SetBI.getOperand(0).isReg() && + "Cannot have a non-register defined operand to this variant of SETB!"); + + // Little helper to do the common final step of replacing the register def'ed + // by this SETB instruction with a new register and removing the SETB + // instruction. + auto RewriteToReg = [&](unsigned Reg) { + MRI->replaceRegWith(SetBI.getOperand(0).getReg(), Reg); + SetBI.eraseFromParent(); + }; + + // Grab the register class used for this particular instruction. + auto &SetBRC = *MRI->getRegClass(SetBI.getOperand(0).getReg()); + + MachineBasicBlock &MBB = *SetBI.getParent(); + auto SetPos = SetBI.getIterator(); + auto SetLoc = SetBI.getDebugLoc(); + + auto AdjustReg = [&](unsigned Reg) { + auto &OrigRC = *MRI->getRegClass(Reg); + if (&OrigRC == &SetBRC) + return Reg; + + unsigned NewReg; + + int OrigRegSize = TRI->getRegSizeInBits(OrigRC) / 8; + int TargetRegSize = TRI->getRegSizeInBits(SetBRC) / 8; + assert(OrigRegSize <= 8 && "No GPRs larger than 64-bits!"); + assert(TargetRegSize <= 8 && "No GPRs larger than 64-bits!"); + int SubRegIdx[] = {X86::NoSubRegister, X86::sub_8bit, X86::sub_16bit, + X86::NoSubRegister, X86::sub_32bit}; + + // If the original size is smaller than the target *and* is smaller than 4 + // bytes, we need to explicitly zero extend it. We always extend to 4-bytes + // to maximize the chance of being able to CSE that operation and to avoid + // partial dependency stalls extending to 2-bytes. + if (OrigRegSize < TargetRegSize && OrigRegSize < 4) { + NewReg = MRI->createVirtualRegister(&X86::GR32RegClass); + BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOVZX32rr8), NewReg) + .addReg(Reg); + if (&SetBRC == &X86::GR32RegClass) + return NewReg; + Reg = NewReg; + OrigRegSize = 4; + } + + NewReg = MRI->createVirtualRegister(&SetBRC); + if (OrigRegSize < TargetRegSize) { + BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::SUBREG_TO_REG), + NewReg) + .addImm(0) + .addReg(Reg) + .addImm(SubRegIdx[OrigRegSize]); + } else if (OrigRegSize > TargetRegSize) { + BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::EXTRACT_SUBREG), + NewReg) + .addReg(Reg) + .addImm(SubRegIdx[TargetRegSize]); + } else { + BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg) + .addReg(Reg); + } + return NewReg; + }; + + unsigned &CondReg = CondRegs[X86::COND_B]; + if (!CondReg) + CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, X86::COND_B); + + // Adjust the condition to have the desired register width by zero-extending + // as needed. + // FIXME: We should use a better API to avoid the local reference and using a + // different variable here. + unsigned ExtCondReg = AdjustReg(CondReg); + + // Now we need to turn this into a bitmask. We do this by subtracting it from + // zero. + unsigned ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass); + BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg); + ZeroReg = AdjustReg(ZeroReg); + + unsigned Sub; + switch (SetBI.getOpcode()) { + case X86::SETB_C8r: + Sub = X86::SUB8rr; + break; + + case X86::SETB_C16r: + Sub = X86::SUB16rr; + break; + + case X86::SETB_C32r: + Sub = X86::SUB32rr; + break; + + case X86::SETB_C64r: + Sub = X86::SUB64rr; + break; + + default: + llvm_unreachable("Invalid SETB_C* opcode!"); + } + unsigned ResultReg = MRI->createVirtualRegister(&SetBRC); + BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg) + .addReg(ZeroReg) + .addReg(ExtCondReg); + return RewriteToReg(ResultReg); +} + void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, |

