diff options
author | Alexander Timofeev <Alexander.Timofeev@amd.com> | 2018-09-13 06:34:56 +0000 |
---|---|---|
committer | Alexander Timofeev <Alexander.Timofeev@amd.com> | 2018-09-13 06:34:56 +0000 |
commit | 2fb44808b1e90a0c8300c93cd411850659e536e7 (patch) | |
tree | 648885a486d28be790b936fe2006763518f8cff3 | |
parent | f107123a88fe010ad2695676278fc82668da7034 (diff) | |
download | bcm5719-llvm-2fb44808b1e90a0c8300c93cd411850659e536e7.tar.gz bcm5719-llvm-2fb44808b1e90a0c8300c93cd411850659e536e7.zip |
[AMDGPU] Preliminary patch for divergence driven instruction selection. Load offset inlining pattern changed.
Differential revision: https://reviews.llvm.org/D51975
Reviewers: rampitec
llvm-svn: 342115
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/smrd-fold-offset.mir | 23 |
2 files changed, 24 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index df3bed4f088..c779dae23ec 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4084,6 +4084,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { // V_ADD will be removed by "Remove dead machine instructions". if (Add && (Add->getOpcode() == AMDGPU::V_ADD_I32_e32 || + Add->getOpcode() == AMDGPU::V_ADD_U32_e32 || Add->getOpcode() == AMDGPU::V_ADD_U32_e64)) { static const unsigned SrcNames[2] = { AMDGPU::OpName::src0, diff --git a/llvm/test/CodeGen/AMDGPU/smrd-fold-offset.mir b/llvm/test/CodeGen/AMDGPU/smrd-fold-offset.mir index db8cdd7a5f2..44954f06523 100644 --- a/llvm/test/CodeGen/AMDGPU/smrd-fold-offset.mir +++ b/llvm/test/CodeGen/AMDGPU/smrd-fold-offset.mir @@ -21,3 +21,26 @@ body: | $vgpr0 = COPY %9 SI_RETURN_TO_EPILOG $vgpr0 ... + +# GCN: BUFFER_LOAD_DWORD_OFFEN %{{[0-9]+}}, killed %{{[0-9]+}}, 0, 4095 +--- +name: smrd_vgpr_offset_imm_add_u32 +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0 + + %4:vgpr_32 = COPY $vgpr0 + %3:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %5:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 + %6:sreg_32_xm0 = S_MOV_B32 4095 + %8:vgpr_32 = COPY %6 + %7:vgpr_32 = V_ADD_U32_e32 %4, killed %8, implicit $exec + %10:sreg_32 = COPY %7 + %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR killed %5, killed %10, 0 :: (dereferenceable invariant load 4) + $vgpr0 = COPY %9 + SI_RETURN_TO_EPILOG $vgpr0 + +... |