diff options
| author | Nicolai Haehnle <nhaehnle@gmail.com> | 2018-07-30 09:23:59 +0000 |
|---|---|---|
| committer | Nicolai Haehnle <nhaehnle@gmail.com> | 2018-07-30 09:23:59 +0000 |
| commit | 7f0d05d53219716394760dd16af9562d36ab96d6 (patch) | |
| tree | 93c11edced8112089d1101876a7b8743876b4051 /llvm/lib/Target | |
| parent | a692120cb76b25d1a683f7479b7549b455015951 (diff) | |
| download | bcm5719-llvm-7f0d05d53219716394760dd16af9562d36ab96d6.tar.gz bcm5719-llvm-7f0d05d53219716394760dd16af9562d36ab96d6.zip | |
AMDGPU: Force skip over s_sendmsg and exp instructions
Summary:
These instructions interact with hardware blocks outside the shader core,
and they can have "scalar" side effects even when EXEC = 0. We don't
want these scalar side effects to occur when all lanes want to skip
these instructions, so always add the execz skip branch instruction
for basic blocks that contain them.
Also ensure that we skip scalar stores / atomics, though we don't
code-gen those yet.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D48431
Change-Id: Ieaeb58352e2789ffd64745603c14970c60819d44
llvm-svn: 338235
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertSkips.cpp | 22 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 30 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.h | 3 |
3 files changed, 35 insertions, 20 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp index 61c8f359e16..dc9397cf7b8 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -133,28 +133,10 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From, I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ) return true; - // V_READFIRSTLANE/V_READLANE destination register may be used as operand - // by some SALU instruction. If exec mask is zero vector instruction - // defining the register that is used by the scalar one is not executed - // and scalar instruction will operate on undefined data. For - // V_READFIRSTLANE/V_READLANE we should avoid predicated execution. - if ((I->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) || - (I->getOpcode() == AMDGPU::V_READLANE_B32)) { + if (TII->hasUnwantedEffectsWhenEXECEmpty(*I)) return true; - } - - if (I->isInlineAsm()) { - const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); - const char *AsmStr = I->getOperand(0).getSymbolName(); - - // inlineasm length estimate is number of bytes assuming the longest - // instruction. - uint64_t MaxAsmSize = TII->getInlineAsmLength(AsmStr, *MAI); - NumInstr += MaxAsmSize / MAI->getMaxInstLength(); - } else { - ++NumInstr; - } + ++NumInstr; if (NumInstr >= SkipThreshold) return true; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 6c85c92454c..f3745382a6f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2332,6 +2332,36 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI, changesVGPRIndexingMode(MI); } +bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const { + unsigned Opcode = MI.getOpcode(); + + if (MI.mayStore() && isSMRD(MI)) + return true; // scalar store or atomic + + // These instructions cause shader I/O that may cause hardware lockups + // when executed with an empty EXEC mask. + // + // Note: exp with VM = DONE = 0 is automatically skipped by hardware when + // EXEC = 0, but checking for that case here seems not worth it + // given the typical code patterns. + if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT || + Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE) + return true; + + if (MI.isInlineAsm()) + return true; // conservative assumption + + // These are like SALU instructions in terms of effects, so it's questionable + // whether we should return true for those. + // + // However, executing them with EXEC = 0 causes them to operate on undefined + // data, which we avoid by returning true here. + if (Opcode == AMDGPU::V_READFIRSTLANE_B32 || Opcode == AMDGPU::V_READLANE_B32) + return true; + + return false; +} + bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { switch (Imm.getBitWidth()) { case 32: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 0a735257d34..d681b926504 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -597,6 +597,9 @@ public: return !RI.isSGPRReg(MRI, Dest); } + /// Whether we must prevent this instruction from executing with EXEC = 0. + bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const; + bool isInlineConstant(const APInt &Imm) const; bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const; |

