summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/R600/AMDGPU.h1
-rw-r--r--llvm/lib/Target/R600/AMDGPUTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/R600/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/R600/SIInstrFormats.td1
-rw-r--r--llvm/lib/Target/R600/SIInstrInfo.cpp9
-rw-r--r--llvm/lib/Target/R600/SIInstrInfo.h6
-rw-r--r--llvm/lib/Target/R600/SIInstrInfo.td9
-rw-r--r--llvm/lib/Target/R600/SIShrinkInstructions.cpp189
8 files changed, 218 insertions, 0 deletions
diff --git a/llvm/lib/Target/R600/AMDGPU.h b/llvm/lib/Target/R600/AMDGPU.h
index 416e050f675..d7e94f75e12 100644
--- a/llvm/lib/Target/R600/AMDGPU.h
+++ b/llvm/lib/Target/R600/AMDGPU.h
@@ -39,6 +39,7 @@ FunctionPass *createAMDGPUCFGStructurizerPass();
FunctionPass *createSITypeRewriter();
FunctionPass *createSIAnnotateControlFlowPass();
FunctionPass *createSILowerI1CopiesPass();
+FunctionPass *createSIShrinkInstructionsPass();
FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
FunctionPass *createSIFixSGPRLiveRangesPass();
diff --git a/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp b/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp
index 23beb2576ac..56ba719e686 100644
--- a/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -176,6 +176,7 @@ bool AMDGPUPassConfig::addPreRegAlloc() {
// SIFixSGPRCopies can generate a lot of duplicate instructions,
// so we need to run MachineCSE afterwards.
addPass(&MachineCSEID);
+ addPass(createSIShrinkInstructionsPass());
initializeSIFixSGPRLiveRangesPass(*PassRegistry::getPassRegistry());
insertPass(&RegisterCoalescerID, &SIFixSGPRLiveRangesID);
}
@@ -185,6 +186,7 @@ bool AMDGPUPassConfig::addPreRegAlloc() {
bool AMDGPUPassConfig::addPostRegAlloc() {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ addPass(createSIShrinkInstructionsPass());
if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
addPass(createSIInsertWaits(*TM));
}
diff --git a/llvm/lib/Target/R600/CMakeLists.txt b/llvm/lib/Target/R600/CMakeLists.txt
index 4d160826004..49a7f8aa18c 100644
--- a/llvm/lib/Target/R600/CMakeLists.txt
+++ b/llvm/lib/Target/R600/CMakeLists.txt
@@ -48,6 +48,7 @@ add_llvm_target(R600CodeGen
SILowerI1Copies.cpp
SIMachineFunctionInfo.cpp
SIRegisterInfo.cpp
+ SIShrinkInstructions.cpp
SITypeRewriter.cpp
)
diff --git a/llvm/lib/Target/R600/SIInstrFormats.td b/llvm/lib/Target/R600/SIInstrFormats.td
index b9b302957f6..d4cee0d751b 100644
--- a/llvm/lib/Target/R600/SIInstrFormats.td
+++ b/llvm/lib/Target/R600/SIInstrFormats.td
@@ -288,6 +288,7 @@ class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
let VOPC = 1;
}
diff --git a/llvm/lib/Target/R600/SIInstrInfo.cpp b/llvm/lib/Target/R600/SIInstrInfo.cpp
index 59f10b6ded1..8c3af77e023 100644
--- a/llvm/lib/Target/R600/SIInstrInfo.cpp
+++ b/llvm/lib/Target/R600/SIInstrInfo.cpp
@@ -1639,3 +1639,12 @@ void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved,
for (int Index = std::max(0, Begin - 15); Index <= End; ++Index)
Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index));
}
+
+const MachineOperand *SIInstrInfo::getNamedOperand(const MachineInstr& MI,
+ unsigned OperandName) const {
+ int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
+ if (Idx == -1)
+ return nullptr;
+
+ return &MI.getOperand(Idx);
+}
diff --git a/llvm/lib/Target/R600/SIInstrInfo.h b/llvm/lib/Target/R600/SIInstrInfo.h
index e8b6b6d69f8..13ab4843fda 100644
--- a/llvm/lib/Target/R600/SIInstrInfo.h
+++ b/llvm/lib/Target/R600/SIInstrInfo.h
@@ -174,11 +174,17 @@ public:
unsigned SavReg, unsigned IndexReg) const;
void insertNOPs(MachineBasicBlock::iterator MI, int Count) const;
+
+ /// \brief Returns the operand named \p Op. If \p MI does not have an
+ /// operand named \c Op, this function returns nullptr.
+ const MachineOperand *getNamedOperand(const MachineInstr& MI,
+ unsigned OperandName) const;
};
namespace AMDGPU {
int getVOPe64(uint16_t Opcode);
+ int getVOPe32(uint16_t Opcode);
int getCommuteRev(uint16_t Opcode);
int getCommuteOrig(uint16_t Opcode);
int getMCOpcode(uint16_t Opcode, unsigned Gen);
diff --git a/llvm/lib/Target/R600/SIInstrInfo.td b/llvm/lib/Target/R600/SIInstrInfo.td
index 841d037ca2b..0a624a3e13d 100644
--- a/llvm/lib/Target/R600/SIInstrInfo.td
+++ b/llvm/lib/Target/R600/SIInstrInfo.td
@@ -829,6 +829,15 @@ def getVOPe64 : InstrMapping {
let ValueCols = [["8"]];
}
+// Maps an opcode in e64 form to its e32 equivalent
+def getVOPe32 : InstrMapping {
+ let FilterClass = "VOP";
+ let RowFields = ["OpName"];
+ let ColFields = ["Size"];
+ let KeyCol = ["8"];
+ let ValueCols = [["4"]];
+}
+
// Maps an original opcode to its commuted version
def getCommuteRev : InstrMapping {
let FilterClass = "VOP2_REV";
diff --git a/llvm/lib/Target/R600/SIShrinkInstructions.cpp b/llvm/lib/Target/R600/SIShrinkInstructions.cpp
new file mode 100644
index 00000000000..362a5c1e4e0
--- /dev/null
+++ b/llvm/lib/Target/R600/SIShrinkInstructions.cpp
@@ -0,0 +1,189 @@
+//===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// The pass tries to use the 32-bit encoding for instructions when possible.
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPU.h"
+#include "SIInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "si-shrink-instructions"
+
+STATISTIC(NumInstructionsShrunk,
+ "Number of 64-bit instruction reduced to 32-bit.");
+
+namespace llvm {
+ void initializeSIShrinkInstructionsPass(PassRegistry&);
+}
+
+using namespace llvm;
+
+namespace {
+
+class SIShrinkInstructions : public MachineFunctionPass {
+public:
+ static char ID;
+
+public:
+ SIShrinkInstructions() : MachineFunctionPass(ID) {
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) override;
+
+ virtual const char *getPassName() const override {
+ return "SI Shrink Instructions";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SIShrinkInstructions, DEBUG_TYPE,
+ "SI Lower il Copies", false, false)
+INITIALIZE_PASS_END(SIShrinkInstructions, DEBUG_TYPE,
+ "SI Lower il Copies", false, false)
+
+char SIShrinkInstructions::ID = 0;
+
+FunctionPass *llvm::createSIShrinkInstructionsPass() {
+ return new SIShrinkInstructions();
+}
+
+static bool isVGPR(const MachineOperand *MO, const SIRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI) {
+ if (!MO->isReg())
+ return false;
+
+ if (TargetRegisterInfo::isVirtualRegister(MO->getReg()))
+ return TRI.hasVGPRs(MRI.getRegClass(MO->getReg()));
+
+ return TRI.hasVGPRs(TRI.getPhysRegClass(MO->getReg()));
+}
+
+static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
+ const SIRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI) {
+
+ const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
+ // Can't shrink instruction with three operands.
+ if (Src2)
+ return false;
+
+ const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ const MachineOperand *Src1Mod =
+ TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
+
+ if (Src1 && (!isVGPR(Src1, TRI, MRI) || Src1Mod->getImm() != 0))
+ return false;
+
+ // We don't need to check src0, all input types are legal, so just make
+ // sure src0 isn't using any modifiers.
+ const MachineOperand *Src0Mod =
+ TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
+ if (Src0Mod && Src0Mod->getImm() != 0)
+ return false;
+
+ // Check output modifiers
+ const MachineOperand *Omod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
+ if (Omod && Omod->getImm() != 0)
+ return false;
+
+ const MachineOperand *Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp);
+ return !Clamp || Clamp->getImm() == 0;
+}
+
+bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+ MF.getTarget().getInstrInfo());
+ const SIRegisterInfo &TRI = TII->getRegisterInfo();
+ std::vector<unsigned> I1Defs;
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI) {
+
+ MachineBasicBlock &MBB = *BI;
+ MachineBasicBlock::iterator I, Next;
+ for (I = MBB.begin(); I != MBB.end(); I = Next) {
+ Next = std::next(I);
+ MachineInstr &MI = *I;
+
+ int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
+
+ if (Op32 == -1)
+ continue;
+
+ if (!canShrink(MI, TII, TRI, MRI)) {
+ // Try commtuing the instruction and see if that enables us to shrink
+ // it.
+ if (!MI.isCommutable() || !TII->commuteInstruction(&MI) ||
+ !canShrink(MI, TII, TRI, MRI))
+ continue;
+ }
+
+ if (TII->isVOPC(Op32)) {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ // VOPC instructions can only write to the VCC register. We can't
+ // force them to use VCC here, because the register allocator
+ // has trouble with sequences like this, which cause the allocator
+ // to run out of registes if vreg0 and vreg1 belong to the VCCReg
+ // register class:
+ // vreg0 = VOPC;
+ // vreg1 = VOPC;
+ // S_AND_B64 vreg0, vreg1
+ //
+ // So, instead of forcing the instruction to write to VCC, we provide a
+ // hint to the register allocator to use VCC and then we
+ // we will run this pass again after RA and shrink it if it outpus to
+ // VCC.
+ MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC);
+ continue;
+ }
+ if (DstReg != AMDGPU::VCC)
+ continue;
+ }
+
+ // We can shrink this instruction
+ DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << "\n";);
+
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32));
+
+ // dst
+ MIB.addOperand(MI.getOperand(0));
+
+ MIB.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
+
+ const MachineOperand *Src1 =
+ TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ if (Src1)
+ MIB.addOperand(*Src1);
+
+ for (const MachineOperand &MO : MI.implicit_operands())
+ MIB.addOperand(MO);
+
+ DEBUG(dbgs() << "e32 MI = "; MI.dump(); dbgs() << "\n";);
+ ++NumInstructionsShrunk;
+ MI.eraseFromParent();
+ }
+ }
+ return false;
+}
OpenPOWER on IntegriCloud