diff options
| author | Alexander Timofeev <Alexander.Timofeev@amd.com> | 2017-10-03 18:55:36 +0000 | 
|---|---|---|
| committer | Alexander Timofeev <Alexander.Timofeev@amd.com> | 2017-10-03 18:55:36 +0000 | 
| commit | 46513965842fc117f3afb0c8ee2fb0fd329d78ae (patch) | |
| tree | be3f041b4a092a1a5ee8b5fb7c3dc3600fac7b96 | |
| parent | 6b1be121c04b52ae105beee5065fc4b582f13388 (diff) | |
| download | bcm5719-llvm-46513965842fc117f3afb0c8ee2fb0fd329d78ae.tar.gz bcm5719-llvm-46513965842fc117f3afb0c8ee2fb0fd329d78ae.zip  | |
[AMDGPU] Avoid predicated execution of the basic blocks containing scalar
instructions.
Differential revision: https://reviews.llvm.org/D38293
llvm-svn: 314828
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertSkips.cpp | 10 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/readlane_exec0.mir | 32 | 
2 files changed, 42 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp index ba346d2fad0..9bd58c45ce0 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -132,6 +132,16 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,            I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)          return true; +      // V_READFIRSTLANE/V_READLANE destination register may be used as operand +      // by some SALU instruction. If exec mask is zero vector instruction +      // defining the register that is used by the scalar one is not executed +      // and scalar instruction will operate on undefined data. For +      // V_READFIRSTLANE/V_READLANE we should avoid predicated execution. +      if ((I->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) || +          (I->getOpcode() == AMDGPU::V_READLANE_B32)) { +        return true; +      } +        if (I->isInlineAsm()) {          const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();          const char *AsmStr = I->getOperand(0).getSymbolName(); diff --git a/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir b/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir new file mode 100644 index 00000000000..b6d58d74ebd --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir @@ -0,0 +1,32 @@ +# RUN: llc -o - %s -march=amdgcn -mcpu=fiji  -run-pass=si-insert-skips -verify-machineinstrs | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: readlane_exec0 +# GCN: bb.0 +# GCN: S_CBRANCH_EXECZ %bb.2 + +--- +name: readlane_exec0 + +body:       | +  bb.0: +    successors: %bb.1, %bb.2 +    liveins: %vgpr1_vgpr2:0x00000001, %vgpr2_vgpr3:0x00000003 + +    %vgpr4 = V_AND_B32_e32 1, %vgpr1, implicit %exec +    V_CMP_EQ_U32_e32 1, killed %vgpr4, implicit-def %vcc, implicit %exec +    %sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed %vcc, implicit-def %exec, implicit-def %scc, implicit %exec +    SI_MASK_BRANCH %bb.2, implicit %exec +    S_BRANCH %bb.1 + +  bb.1: + +   %sgpr10 = V_READFIRSTLANE_B32 %vgpr2, implicit %exec +   %sgpr11 = V_READFIRSTLANE_B32 %vgpr3, implicit %exec +   %sgpr10 = S_LOAD_DWORD_IMM killed %sgpr10_sgpr11, 0, 0 +   S_WAITCNT 127 +   %vgpr0 = V_XOR_B32_e32 killed %sgpr10, killed %vgpr0, implicit %exec + +  bb.2: + +    %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc +...  | 

