summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorNicolai Haehnle <nhaehnle@gmail.com>2018-07-30 09:23:59 +0000
committerNicolai Haehnle <nhaehnle@gmail.com>2018-07-30 09:23:59 +0000
commit7f0d05d53219716394760dd16af9562d36ab96d6 (patch)
tree93c11edced8112089d1101876a7b8743876b4051 /llvm/lib/Target
parenta692120cb76b25d1a683f7479b7549b455015951 (diff)
downloadbcm5719-llvm-7f0d05d53219716394760dd16af9562d36ab96d6.tar.gz
bcm5719-llvm-7f0d05d53219716394760dd16af9562d36ab96d6.zip
AMDGPU: Force skip over s_sendmsg and exp instructions
Summary: These instructions interact with hardware blocks outside the shader core, and they can have "scalar" side effects even when EXEC = 0. We don't want these scalar side effects to occur when all lanes want to skip these instructions, so always add the execz skip branch instruction for basic blocks that contain them. Also ensure that we skip scalar stores / atomics, though we don't code-gen those yet. Reviewers: arsenm, rampitec Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D48431 Change-Id: Ieaeb58352e2789ffd64745603c14970c60819d44 llvm-svn: 338235
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertSkips.cpp22
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp30
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h3
3 files changed, 35 insertions, 20 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
index 61c8f359e16..dc9397cf7b8 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -133,28 +133,10 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
return true;
- // V_READFIRSTLANE/V_READLANE destination register may be used as operand
- // by some SALU instruction. If exec mask is zero vector instruction
- // defining the register that is used by the scalar one is not executed
- // and scalar instruction will operate on undefined data. For
- // V_READFIRSTLANE/V_READLANE we should avoid predicated execution.
- if ((I->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) ||
- (I->getOpcode() == AMDGPU::V_READLANE_B32)) {
+ if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
return true;
- }
-
- if (I->isInlineAsm()) {
- const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
- const char *AsmStr = I->getOperand(0).getSymbolName();
-
- // inlineasm length estimate is number of bytes assuming the longest
- // instruction.
- uint64_t MaxAsmSize = TII->getInlineAsmLength(AsmStr, *MAI);
- NumInstr += MaxAsmSize / MAI->getMaxInstLength();
- } else {
- ++NumInstr;
- }
+ ++NumInstr;
if (NumInstr >= SkipThreshold)
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 6c85c92454c..f3745382a6f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2332,6 +2332,36 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
changesVGPRIndexingMode(MI);
}
+bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const {
+ unsigned Opcode = MI.getOpcode();
+
+ if (MI.mayStore() && isSMRD(MI))
+ return true; // scalar store or atomic
+
+ // These instructions cause shader I/O that may cause hardware lockups
+ // when executed with an empty EXEC mask.
+ //
+ // Note: exp with VM = DONE = 0 is automatically skipped by hardware when
+ // EXEC = 0, but checking for that case here seems not worth it
+ // given the typical code patterns.
+ if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
+ Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE)
+ return true;
+
+ if (MI.isInlineAsm())
+ return true; // conservative assumption
+
+ // These are like SALU instructions in terms of effects, so it's questionable
+ // whether we should return true for those.
+ //
+ // However, executing them with EXEC = 0 causes them to operate on undefined
+ // data, which we avoid by returning true here.
+ if (Opcode == AMDGPU::V_READFIRSTLANE_B32 || Opcode == AMDGPU::V_READLANE_B32)
+ return true;
+
+ return false;
+}
+
bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
switch (Imm.getBitWidth()) {
case 32:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 0a735257d34..d681b926504 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -597,6 +597,9 @@ public:
return !RI.isSGPRReg(MRI, Dest);
}
+ /// Whether we must prevent this instruction from executing with EXEC = 0.
+ bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
+
bool isInlineConstant(const APInt &Imm) const;
bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const;
OpenPOWER on IntegriCloud