//===---- X86FixupSetCC.cpp - optimize usage of LEA instructions ----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines a pass that fixes zero-extension of setcc patterns. // X86 setcc instructions are modeled to have no input arguments, and a single // GR8 output argument. This is consistent with other similar instructions // (e.g. movb), but means it is impossible to directly generate a setcc into // the lower GR8 of a specified GR32. // This means that ISel must select (zext (setcc)) into something like // seta %al; movzbl %al, %eax. // Unfortunately, this can cause a stall due to the partial register write // performed by the setcc. Instead, we can use: // xor %eax, %eax; seta %al // This both avoids the stall, and encodes shorter. //===----------------------------------------------------------------------===// #include "X86.h" #include "X86InstrInfo.h" #include "X86Subtarget.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" using namespace llvm; #define DEBUG_TYPE "x86-fixup-setcc" STATISTIC(NumSubstZexts, "Number of setcc + zext pairs substituted"); namespace { class X86FixupSetCCPass : public MachineFunctionPass { public: X86FixupSetCCPass() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "X86 Fixup SetCC"; } bool runOnMachineFunction(MachineFunction &MF) override; private: // Find the preceding instruction that imp-defs eflags. MachineInstr *findFlagsImpDef(MachineBasicBlock *MBB, MachineBasicBlock::reverse_iterator MI); // Return true if MI imp-uses eflags. bool impUsesFlags(MachineInstr *MI); // Return true if this is the opcode of a SetCC instruction with a register // output. bool isSetCCr(unsigned Opode); MachineRegisterInfo *MRI; const X86InstrInfo *TII; enum { SearchBound = 16 }; static char ID; }; char X86FixupSetCCPass::ID = 0; } FunctionPass *llvm::createX86FixupSetCC() { return new X86FixupSetCCPass(); } // We expect the instruction *immediately* before the setcc to imp-def // EFLAGS (because of scheduling glue). To make this less brittle w.r.t // scheduling, look backwards until we hit the beginning of the // basic-block, or a small bound (to avoid quadratic behavior). MachineInstr * X86FixupSetCCPass::findFlagsImpDef(MachineBasicBlock *MBB, MachineBasicBlock::reverse_iterator MI) { // FIXME: Should this be instr_rend(), and MI be reverse_instr_iterator? auto MBBStart = MBB->rend(); for (int i = 0; (i < SearchBound) && (MI != MBBStart); ++i, ++MI) for (auto &Op : MI->implicit_operands()) if (Op.isReg() && (Op.getReg() == X86::EFLAGS) && Op.isDef()) return &*MI; return nullptr; } bool X86FixupSetCCPass::impUsesFlags(MachineInstr *MI) { for (auto &Op : MI->implicit_operands()) if (Op.isReg() && (Op.getReg() == X86::EFLAGS) && Op.isUse()) return true; return false; } bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; MRI = &MF.getRegInfo(); TII = MF.getSubtarget().getInstrInfo(); SmallVector ToErase; for (auto &MBB : MF) { for (auto &MI : MBB) { // Find a setcc that is used by a zext. // This doesn't have to be the only use, the transformation is safe // regardless. if (MI.getOpcode() != X86::SETCCr) continue; MachineInstr *ZExt = nullptr; for (auto &Use : MRI->use_instructions(MI.getOperand(0).getReg())) if (Use.getOpcode() == X86::MOVZX32rr8) ZExt = &Use; if (!ZExt) continue; // Find the preceding instruction that imp-defs eflags. MachineInstr *FlagsDefMI = findFlagsImpDef( MI.getParent(), MachineBasicBlock::reverse_iterator(&MI)); if (!FlagsDefMI) continue; // We'd like to put something that clobbers eflags directly before // FlagsDefMI. This can't hurt anything after FlagsDefMI, because // it, itself, by definition, clobbers eflags. But it may happen that // FlagsDefMI also *uses* eflags, in which case the transformation is // invalid. if (impUsesFlags(FlagsDefMI)) continue; ++NumSubstZexts; Changed = true; // On 32-bit, we need to be careful to force an ABCD register. const TargetRegisterClass *RC = MF.getSubtarget().is64Bit() ? &X86::GR32RegClass : &X86::GR32_ABCDRegClass; Register ZeroReg = MRI->createVirtualRegister(RC); Register InsertReg = MRI->createVirtualRegister(RC); // Initialize a register with 0. This must go before the eflags def BuildMI(MBB, FlagsDefMI, MI.getDebugLoc(), TII->get(X86::MOV32r0), ZeroReg); // X86 setcc only takes an output GR8, so fake a GR32 input by inserting // the setcc result into the low byte of the zeroed register. BuildMI(*ZExt->getParent(), ZExt, ZExt->getDebugLoc(), TII->get(X86::INSERT_SUBREG), InsertReg) .addReg(ZeroReg) .addReg(MI.getOperand(0).getReg()) .addImm(X86::sub_8bit); MRI->replaceRegWith(ZExt->getOperand(0).getReg(), InsertReg); ToErase.push_back(ZExt); } } for (auto &I : ToErase) I->eraseFromParent(); return Changed; }