diff options
| author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2017-09-06 15:31:30 +0000 |
|---|---|---|
| committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2017-09-06 15:31:30 +0000 |
| commit | 949fac9e40bf6dce04220a7a9b53bb36727e7f56 (patch) | |
| tree | 46c9762040e42231047f91a883b1e948cc0f0676 | |
| parent | 8816a87064e9043cf12b49c19241d513ae94325f (diff) | |
| download | bcm5719-llvm-949fac9e40bf6dce04220a7a9b53bb36727e7f56.tar.gz bcm5719-llvm-949fac9e40bf6dce04220a7a9b53bb36727e7f56.zip | |
[AMDGPU] Fix shouldClusterMemOps to process flat loads
Flat loads do not have vdata operand but have vdst instead.
Differential Revision: https://reviews.llvm.org/D37502
llvm-svn: 312640
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir | 20 |
2 files changed, 24 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index ad9deefc1e7..f7f6d52e751 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -366,7 +366,11 @@ bool SIInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt, (isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt)) || (isFLAT(FirstLdSt) && isFLAT(SecondLdSt))) { FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdata); + if (!FirstDst) + FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst); SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdata); + if (!SecondDst) + SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst); } else if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt)) { FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::sdst); SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::sdst); diff --git a/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir b/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir new file mode 100644 index 00000000000..50caba213d3 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir @@ -0,0 +1,20 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass machine-scheduler %s -o - | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: name: cluster_flat_loads +# GCN: FLAT_LOAD_DWORD %0, 0 +# GCN-NEXT: FLAT_LOAD_DWORD %0, 4 +# GCN-NEXT: V_ADD_F32_e64 +name: cluster_flat_loads +tracksRegLiveness: true +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4) + %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit %exec + %3 = FLAT_LOAD_DWORD %0, 4, 0, 0, implicit %exec, implicit %flat_scr :: (load 4) +... |

