summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2018-05-15 20:16:57 +0000
committerChandler Carruth <chandlerc@gmail.com>2018-05-15 20:16:57 +0000
commit5ecd81aab0ddc74abb4ccef86e1aa8dcead3f5c2 (patch)
tree278c7e23b09b965a49b2791dd9c4bb0aa171990c /llvm/lib/Target
parentec8598efb17216c0f4c93040ac1b40f97ae01fc7 (diff)
downloadbcm5719-llvm-5ecd81aab0ddc74abb4ccef86e1aa8dcead3f5c2.tar.gz
bcm5719-llvm-5ecd81aab0ddc74abb4ccef86e1aa8dcead3f5c2.zip
[x86][eflags] Fix PR37431 by teaching the EFLAGS copy lowering to
specially handle SETB_C* pseudo instructions. Summary: While the logic here is somewhat similar to the arithmetic lowering, it is different enough that it made sense to have its own function. I actually tried a bunch of different optimizations here and none worked well so I gave up and just always do the arithmetic based lowering. Looking at code from the PR test case, we actually pessimize a bunch of code when generating these. Because SETB_C* pseudo instructions clobber EFLAGS, we end up creating a bunch of copies of EFLAGS to feed multiple SETB_C* pseudos from a single set of EFLAGS. This in turn causes the lowering code to ruin all the clever code generation that SETB_C* was hoping to achieve. None of this is needed. Whenever we're generating multiple SETB_C* instructions from a single set of EFLAGS we should instead generate a single maximally wide one and extract subregs for all the different desired widths. That would result in substantially better code generation. But this patch doesn't attempt to address that. The test case from the PR is included as well as more directed testing of the specific lowering pattern used for these pseudos. Reviewers: craig.topper Subscribers: sanjoy, mcrosier, llvm-commits, hiraditya Differential Revision: https://reviews.llvm.org/D46799 llvm-svn: 332389
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86FlagsCopyLowering.cpp145
1 files changed, 142 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
index c18739254b0..bc504824446 100644
--- a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -127,6 +127,10 @@ private:
MachineInstr &JmpI, CondRegArray &CondRegs);
void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse,
MachineInstr &CopyDefI);
+ void rewriteSetCarryExtended(MachineBasicBlock &TestMBB,
+ MachineBasicBlock::iterator TestPos,
+ DebugLoc TestLoc, MachineInstr &SetBI,
+ MachineOperand &FlagUse, CondRegArray &CondRegs);
void rewriteSetCC(MachineBasicBlock &TestMBB,
MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
MachineInstr &SetCCI, MachineOperand &FlagUse,
@@ -512,8 +516,7 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
} else if (MI.getOpcode() == TargetOpcode::COPY) {
rewriteCopy(MI, *FlagUse, CopyDefI);
} else {
- // We assume that arithmetic instructions that use flags also def
- // them.
+ // We assume all other instructions that use flags also def them.
assert(MI.findRegisterDefOperand(X86::EFLAGS) &&
"Expected a def of EFLAGS for this instruction!");
@@ -525,7 +528,23 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
// logic.
FlagsKilled = true;
- rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
+ switch (MI.getOpcode()) {
+ case X86::SETB_C8r:
+ case X86::SETB_C16r:
+ case X86::SETB_C32r:
+ case X86::SETB_C64r:
+ // Use custom lowering for arithmetic that is merely extending the
+ // carry flag. We model this as the SETB_C* pseudo instructions.
+ rewriteSetCarryExtended(TestMBB, TestPos, TestLoc, MI, *FlagUse,
+ CondRegs);
+ break;
+
+ default:
+ // Generically handle remaining uses as arithmetic instructions.
+ rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse,
+ CondRegs);
+ break;
+ }
break;
}
@@ -753,6 +772,126 @@ void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI,
MI.eraseFromParent();
}
+void X86FlagsCopyLoweringPass::rewriteSetCarryExtended(
+ MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
+ DebugLoc TestLoc, MachineInstr &SetBI, MachineOperand &FlagUse,
+ CondRegArray &CondRegs) {
+ // This routine is only used to handle pseudos for setting a register to zero
+ // or all ones based on CF. This is essentially the sign extended from 1-bit
+ // form of SETB and modeled with the SETB_C* pseudos. They require special
+ // handling as they aren't normal SETcc instructions and are lowered to an
+ // EFLAGS clobbering operation (SBB typically). One simplifying aspect is that
+ // they are only provided in reg-defining forms. A complicating factor is that
+ // they can define many different register widths.
+ assert(SetBI.getOperand(0).isReg() &&
+ "Cannot have a non-register defined operand to this variant of SETB!");
+
+ // Little helper to do the common final step of replacing the register def'ed
+ // by this SETB instruction with a new register and removing the SETB
+ // instruction.
+ auto RewriteToReg = [&](unsigned Reg) {
+ MRI->replaceRegWith(SetBI.getOperand(0).getReg(), Reg);
+ SetBI.eraseFromParent();
+ };
+
+ // Grab the register class used for this particular instruction.
+ auto &SetBRC = *MRI->getRegClass(SetBI.getOperand(0).getReg());
+
+ MachineBasicBlock &MBB = *SetBI.getParent();
+ auto SetPos = SetBI.getIterator();
+ auto SetLoc = SetBI.getDebugLoc();
+
+ auto AdjustReg = [&](unsigned Reg) {
+ auto &OrigRC = *MRI->getRegClass(Reg);
+ if (&OrigRC == &SetBRC)
+ return Reg;
+
+ unsigned NewReg;
+
+ int OrigRegSize = TRI->getRegSizeInBits(OrigRC) / 8;
+ int TargetRegSize = TRI->getRegSizeInBits(SetBRC) / 8;
+ assert(OrigRegSize <= 8 && "No GPRs larger than 64-bits!");
+ assert(TargetRegSize <= 8 && "No GPRs larger than 64-bits!");
+ int SubRegIdx[] = {X86::NoSubRegister, X86::sub_8bit, X86::sub_16bit,
+ X86::NoSubRegister, X86::sub_32bit};
+
+ // If the original size is smaller than the target *and* is smaller than 4
+ // bytes, we need to explicitly zero extend it. We always extend to 4-bytes
+ // to maximize the chance of being able to CSE that operation and to avoid
+ // partial dependency stalls extending to 2-bytes.
+ if (OrigRegSize < TargetRegSize && OrigRegSize < 4) {
+ NewReg = MRI->createVirtualRegister(&X86::GR32RegClass);
+ BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOVZX32rr8), NewReg)
+ .addReg(Reg);
+ if (&SetBRC == &X86::GR32RegClass)
+ return NewReg;
+ Reg = NewReg;
+ OrigRegSize = 4;
+ }
+
+ NewReg = MRI->createVirtualRegister(&SetBRC);
+ if (OrigRegSize < TargetRegSize) {
+ BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::SUBREG_TO_REG),
+ NewReg)
+ .addImm(0)
+ .addReg(Reg)
+ .addImm(SubRegIdx[OrigRegSize]);
+ } else if (OrigRegSize > TargetRegSize) {
+ BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::EXTRACT_SUBREG),
+ NewReg)
+ .addReg(Reg)
+ .addImm(SubRegIdx[TargetRegSize]);
+ } else {
+ BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg)
+ .addReg(Reg);
+ }
+ return NewReg;
+ };
+
+ unsigned &CondReg = CondRegs[X86::COND_B];
+ if (!CondReg)
+ CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, X86::COND_B);
+
+ // Adjust the condition to have the desired register width by zero-extending
+ // as needed.
+ // FIXME: We should use a better API to avoid the local reference and using a
+ // different variable here.
+ unsigned ExtCondReg = AdjustReg(CondReg);
+
+ // Now we need to turn this into a bitmask. We do this by subtracting it from
+ // zero.
+ unsigned ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass);
+ BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg);
+ ZeroReg = AdjustReg(ZeroReg);
+
+ unsigned Sub;
+ switch (SetBI.getOpcode()) {
+ case X86::SETB_C8r:
+ Sub = X86::SUB8rr;
+ break;
+
+ case X86::SETB_C16r:
+ Sub = X86::SUB16rr;
+ break;
+
+ case X86::SETB_C32r:
+ Sub = X86::SUB32rr;
+ break;
+
+ case X86::SETB_C64r:
+ Sub = X86::SUB64rr;
+ break;
+
+ default:
+ llvm_unreachable("Invalid SETB_C* opcode!");
+ }
+ unsigned ResultReg = MRI->createVirtualRegister(&SetBRC);
+ BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg)
+ .addReg(ZeroReg)
+ .addReg(ExtCondReg);
+ return RewriteToReg(ResultReg);
+}
+
void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
MachineBasicBlock::iterator TestPos,
DebugLoc TestLoc,
OpenPOWER on IntegriCloud