diff options
author | Alexander Timofeev <Alexander.Timofeev@amd.com> | 2018-09-11 11:56:50 +0000 |
---|---|---|
committer | Alexander Timofeev <Alexander.Timofeev@amd.com> | 2018-09-11 11:56:50 +0000 |
commit | db7ee7660a8efce83ad34a3b211f5d3e624a3afd (patch) | |
tree | 3ceac940a3c81557e70c188ad1d660636c326f3c /llvm/lib/Target | |
parent | ae3cfeb3ad0e4d3d8846d83c4b75e2933f043800 (diff) | |
download | bcm5719-llvm-db7ee7660a8efce83ad34a3b211f5d3e624a3afd.tar.gz bcm5719-llvm-db7ee7660a8efce83ad34a3b211f5d3e624a3afd.zip |
[AMDGPU] Preliminary patch for divergence driven instruction selection. Immediate selection predicate changed
Differential revision: https://reviews.llvm.org/D51734
Reviewers: rampitec
llvm-svn: 341928
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 52 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 19 |
4 files changed, 62 insertions, 21 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index c95776d0982..d689d92d96f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -101,7 +101,7 @@ private: std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const; bool isNoNanSrc(SDValue N) const; bool isInlineImmediate(const SDNode *N) const; - + bool isVGPRImm(const SDNode *N) const; bool isUniformBr(const SDNode *N) const; MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const; @@ -2068,6 +2068,56 @@ bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const { return isExtractHiElt(In, Src); } +bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const { + if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) { + return false; + } + const SIRegisterInfo *SIRI = + static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo()); + const SIInstrInfo * SII = + static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); + + unsigned Limit = 0; + bool AllUsesAcceptSReg = true; + for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); + Limit < 10 && U != E; ++U, ++Limit) { + const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo()); + + // If the register class is unknown, it could be an unknown + // register class that needs to be an SGPR, e.g. an inline asm + // constraint + if (!RC || SIRI->isSGPRClass(RC)) + return false; + + if (RC != &AMDGPU::VS_32RegClass) { + AllUsesAcceptSReg = false; + SDNode * User = *U; + if (User->isMachineOpcode()) { + unsigned Opc = User->getMachineOpcode(); + MCInstrDesc Desc = SII->get(Opc); + if (Desc.isCommutable()) { + unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo(); + unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex; + if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) { + unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs(); + const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo); + if (CommutedRC == &AMDGPU::VS_32RegClass) + AllUsesAcceptSReg = true; + } + } + } + // If "AllUsesAcceptSReg == false" so far we haven't suceeded + // commuting current user. This means have at least one use + // that strictly require VGPR. Thus, we will not attempt to commute + // other user instructions. + if (!AllUsesAcceptSReg) + break; + } + } + return !AllUsesAcceptSReg && (Limit < 10); +} + + void AMDGPUDAGToDAGISel::PostprocessISelDAG() { const AMDGPUTargetLowering& Lowering = *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 9b870fb8453..df3bed4f088 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1421,10 +1421,15 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, // TargetInstrInfo::commuteInstruction uses it. bool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const { - if (!MI.isCommutable()) + return findCommutedOpIndices(MI.getDesc(), SrcOpIdx0, SrcOpIdx1); +} + +bool SIInstrInfo::findCommutedOpIndices(MCInstrDesc Desc, unsigned &SrcOpIdx0, + unsigned &SrcOpIdx1) const { + if (!Desc.isCommutable()) return false; - unsigned Opc = MI.getOpcode(); + unsigned Opc = Desc.getOpcode(); int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); if (Src0Idx == -1) return false; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 1a7fc02ef6f..4685441fab3 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -227,6 +227,9 @@ public: bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override; + bool findCommutedOpIndices(MCInstrDesc Desc, unsigned & SrcOpIdx0, + unsigned & SrcOpIdx1) const; + bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 27bbaf3091b..0a53b0d776e 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -495,24 +495,7 @@ class InlineFPImm <ValueType vt> : PatLeaf <(vt fpimm), [{ }]>; class VGPRImm <dag frag> : PatLeaf<frag, [{ - if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) { - return false; - } - const SIRegisterInfo *SIRI = - static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo()); - unsigned Limit = 0; - for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); - Limit < 10 && U != E; ++U, ++Limit) { - const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo()); - - // If the register class is unknown, it could be an unknown - // register class that needs to be an SGPR, e.g. an inline asm - // constraint - if (!RC || SIRI->isSGPRClass(RC)) - return false; - } - - return Limit < 10; + return isVGPRImm(N); }]>; def NegateImm : SDNodeXForm<imm, [{ |