diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/R600/AMDGPU.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/R600/AMDGPUTargetMachine.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/R600/CMakeLists.txt | 1 | ||||
-rw-r--r-- | llvm/lib/Target/R600/SIInstrFormats.td | 1 | ||||
-rw-r--r-- | llvm/lib/Target/R600/SIInstrInfo.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/Target/R600/SIInstrInfo.h | 6 | ||||
-rw-r--r-- | llvm/lib/Target/R600/SIInstrInfo.td | 9 | ||||
-rw-r--r-- | llvm/lib/Target/R600/SIShrinkInstructions.cpp | 189 |
8 files changed, 218 insertions, 0 deletions
diff --git a/llvm/lib/Target/R600/AMDGPU.h b/llvm/lib/Target/R600/AMDGPU.h index 416e050f675..d7e94f75e12 100644 --- a/llvm/lib/Target/R600/AMDGPU.h +++ b/llvm/lib/Target/R600/AMDGPU.h @@ -39,6 +39,7 @@ FunctionPass *createAMDGPUCFGStructurizerPass(); FunctionPass *createSITypeRewriter(); FunctionPass *createSIAnnotateControlFlowPass(); FunctionPass *createSILowerI1CopiesPass(); +FunctionPass *createSIShrinkInstructionsPass(); FunctionPass *createSILowerControlFlowPass(TargetMachine &tm); FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm); FunctionPass *createSIFixSGPRLiveRangesPass(); diff --git a/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp b/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp index 23beb2576ac..56ba719e686 100644 --- a/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -176,6 +176,7 @@ bool AMDGPUPassConfig::addPreRegAlloc() { // SIFixSGPRCopies can generate a lot of duplicate instructions, // so we need to run MachineCSE afterwards. addPass(&MachineCSEID); + addPass(createSIShrinkInstructionsPass()); initializeSIFixSGPRLiveRangesPass(*PassRegistry::getPassRegistry()); insertPass(&RegisterCoalescerID, &SIFixSGPRLiveRangesID); } @@ -185,6 +186,7 @@ bool AMDGPUPassConfig::addPreRegAlloc() { bool AMDGPUPassConfig::addPostRegAlloc() { const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); + addPass(createSIShrinkInstructionsPass()); if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { addPass(createSIInsertWaits(*TM)); } diff --git a/llvm/lib/Target/R600/CMakeLists.txt b/llvm/lib/Target/R600/CMakeLists.txt index 4d160826004..49a7f8aa18c 100644 --- a/llvm/lib/Target/R600/CMakeLists.txt +++ b/llvm/lib/Target/R600/CMakeLists.txt @@ -48,6 +48,7 @@ add_llvm_target(R600CodeGen SILowerI1Copies.cpp SIMachineFunctionInfo.cpp SIRegisterInfo.cpp + SIShrinkInstructions.cpp SITypeRewriter.cpp ) diff --git a/llvm/lib/Target/R600/SIInstrFormats.td b/llvm/lib/Target/R600/SIInstrFormats.td index b9b302957f6..d4cee0d751b 100644 --- a/llvm/lib/Target/R600/SIInstrFormats.td +++ b/llvm/lib/Target/R600/SIInstrFormats.td @@ -288,6 +288,7 @@ class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> : let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; + let UseNamedOperandTable = 1; let VOPC = 1; } diff --git a/llvm/lib/Target/R600/SIInstrInfo.cpp b/llvm/lib/Target/R600/SIInstrInfo.cpp index 59f10b6ded1..8c3af77e023 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.cpp +++ b/llvm/lib/Target/R600/SIInstrInfo.cpp @@ -1639,3 +1639,12 @@ void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved, for (int Index = std::max(0, Begin - 15); Index <= End; ++Index) Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index)); } + +const MachineOperand *SIInstrInfo::getNamedOperand(const MachineInstr& MI, + unsigned OperandName) const { + int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName); + if (Idx == -1) + return nullptr; + + return &MI.getOperand(Idx); +} diff --git a/llvm/lib/Target/R600/SIInstrInfo.h b/llvm/lib/Target/R600/SIInstrInfo.h index e8b6b6d69f8..13ab4843fda 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.h +++ b/llvm/lib/Target/R600/SIInstrInfo.h @@ -174,11 +174,17 @@ public: unsigned SavReg, unsigned IndexReg) const; void insertNOPs(MachineBasicBlock::iterator MI, int Count) const; + + /// \brief Returns the operand named \p Op. If \p MI does not have an + /// operand named \c Op, this function returns nullptr. + const MachineOperand *getNamedOperand(const MachineInstr& MI, + unsigned OperandName) const; }; namespace AMDGPU { int getVOPe64(uint16_t Opcode); + int getVOPe32(uint16_t Opcode); int getCommuteRev(uint16_t Opcode); int getCommuteOrig(uint16_t Opcode); int getMCOpcode(uint16_t Opcode, unsigned Gen); diff --git a/llvm/lib/Target/R600/SIInstrInfo.td b/llvm/lib/Target/R600/SIInstrInfo.td index 841d037ca2b..0a624a3e13d 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.td +++ b/llvm/lib/Target/R600/SIInstrInfo.td @@ -829,6 +829,15 @@ def getVOPe64 : InstrMapping { let ValueCols = [["8"]]; } +// Maps an opcode in e64 form to its e32 equivalent +def getVOPe32 : InstrMapping { + let FilterClass = "VOP"; + let RowFields = ["OpName"]; + let ColFields = ["Size"]; + let KeyCol = ["8"]; + let ValueCols = [["4"]]; +} + // Maps an original opcode to its commuted version def getCommuteRev : InstrMapping { let FilterClass = "VOP2_REV"; diff --git a/llvm/lib/Target/R600/SIShrinkInstructions.cpp b/llvm/lib/Target/R600/SIShrinkInstructions.cpp new file mode 100644 index 00000000000..362a5c1e4e0 --- /dev/null +++ b/llvm/lib/Target/R600/SIShrinkInstructions.cpp @@ -0,0 +1,189 @@ +//===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// The pass tries to use the 32-bit encoding for instructions when possible. +//===----------------------------------------------------------------------===// +// + +#include "AMDGPU.h" +#include "SIInstrInfo.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetMachine.h" + +#define DEBUG_TYPE "si-shrink-instructions" + +STATISTIC(NumInstructionsShrunk, + "Number of 64-bit instruction reduced to 32-bit."); + +namespace llvm { + void initializeSIShrinkInstructionsPass(PassRegistry&); +} + +using namespace llvm; + +namespace { + +class SIShrinkInstructions : public MachineFunctionPass { +public: + static char ID; + +public: + SIShrinkInstructions() : MachineFunctionPass(ID) { + } + + virtual bool runOnMachineFunction(MachineFunction &MF) override; + + virtual const char *getPassName() const override { + return "SI Shrink Instructions"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +} // End anonymous namespace. + +INITIALIZE_PASS_BEGIN(SIShrinkInstructions, DEBUG_TYPE, + "SI Lower il Copies", false, false) +INITIALIZE_PASS_END(SIShrinkInstructions, DEBUG_TYPE, + "SI Lower il Copies", false, false) + +char SIShrinkInstructions::ID = 0; + +FunctionPass *llvm::createSIShrinkInstructionsPass() { + return new SIShrinkInstructions(); +} + +static bool isVGPR(const MachineOperand *MO, const SIRegisterInfo &TRI, + const MachineRegisterInfo &MRI) { + if (!MO->isReg()) + return false; + + if (TargetRegisterInfo::isVirtualRegister(MO->getReg())) + return TRI.hasVGPRs(MRI.getRegClass(MO->getReg())); + + return TRI.hasVGPRs(TRI.getPhysRegClass(MO->getReg())); +} + +static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII, + const SIRegisterInfo &TRI, + const MachineRegisterInfo &MRI) { + + const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); + // Can't shrink instruction with three operands. + if (Src2) + return false; + + const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); + const MachineOperand *Src1Mod = + TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers); + + if (Src1 && (!isVGPR(Src1, TRI, MRI) || Src1Mod->getImm() != 0)) + return false; + + // We don't need to check src0, all input types are legal, so just make + // sure src0 isn't using any modifiers. + const MachineOperand *Src0Mod = + TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers); + if (Src0Mod && Src0Mod->getImm() != 0) + return false; + + // Check output modifiers + const MachineOperand *Omod = TII->getNamedOperand(MI, AMDGPU::OpName::omod); + if (Omod && Omod->getImm() != 0) + return false; + + const MachineOperand *Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp); + return !Clamp || Clamp->getImm() == 0; +} + +bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + const SIInstrInfo *TII = static_cast<const SIInstrInfo *>( + MF.getTarget().getInstrInfo()); + const SIRegisterInfo &TRI = TII->getRegisterInfo(); + std::vector<unsigned> I1Defs; + + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); + BI != BE; ++BI) { + + MachineBasicBlock &MBB = *BI; + MachineBasicBlock::iterator I, Next; + for (I = MBB.begin(); I != MBB.end(); I = Next) { + Next = std::next(I); + MachineInstr &MI = *I; + + int Op32 = AMDGPU::getVOPe32(MI.getOpcode()); + + if (Op32 == -1) + continue; + + if (!canShrink(MI, TII, TRI, MRI)) { + // Try commtuing the instruction and see if that enables us to shrink + // it. + if (!MI.isCommutable() || !TII->commuteInstruction(&MI) || + !canShrink(MI, TII, TRI, MRI)) + continue; + } + + if (TII->isVOPC(Op32)) { + unsigned DstReg = MI.getOperand(0).getReg(); + if (TargetRegisterInfo::isVirtualRegister(DstReg)) { + // VOPC instructions can only write to the VCC register. We can't + // force them to use VCC here, because the register allocator + // has trouble with sequences like this, which cause the allocator + // to run out of registes if vreg0 and vreg1 belong to the VCCReg + // register class: + // vreg0 = VOPC; + // vreg1 = VOPC; + // S_AND_B64 vreg0, vreg1 + // + // So, instead of forcing the instruction to write to VCC, we provide a + // hint to the register allocator to use VCC and then we + // we will run this pass again after RA and shrink it if it outpus to + // VCC. + MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC); + continue; + } + if (DstReg != AMDGPU::VCC) + continue; + } + + // We can shrink this instruction + DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << "\n";); + + MachineInstrBuilder MIB = + BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32)); + + // dst + MIB.addOperand(MI.getOperand(0)); + + MIB.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0)); + + const MachineOperand *Src1 = + TII->getNamedOperand(MI, AMDGPU::OpName::src1); + if (Src1) + MIB.addOperand(*Src1); + + for (const MachineOperand &MO : MI.implicit_operands()) + MIB.addOperand(MO); + + DEBUG(dbgs() << "e32 MI = "; MI.dump(); dbgs() << "\n";); + ++NumInstructionsShrunk; + MI.eraseFromParent(); + } + } + return false; +} |