diff options
| author | Tom Stellard <thomas.stellard@amd.com> | 2016-10-26 14:38:47 +0000 |
|---|---|---|
| committer | Tom Stellard <thomas.stellard@amd.com> | 2016-10-26 14:38:47 +0000 |
| commit | f8e6eaff6e6ef353736cfedf43abcd158ec43e34 (patch) | |
| tree | 18e756e9498f6b5a0c072c076448443e32c750f5 | |
| parent | 9daed22b040cd22f0eb5d9fab1815ec7d4f7143d (diff) | |
| download | bcm5719-llvm-f8e6eaff6e6ef353736cfedf43abcd158ec43e34.tar.gz bcm5719-llvm-f8e6eaff6e6ef353736cfedf43abcd158ec43e34.zip | |
AMDGPU/SI: Don't emit multi-dword flat memory ops when they might access scratch
Summary:
A single flat memory operations that might access the scratch buffer
can only access MaxPrivateElementSize bytes.
Reviewers: arsenm
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, tony-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D25788
llvm-svn: 285198
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/flat-address-space.ll | 27 |
2 files changed, 41 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 39486abe5e6..3b84e386341 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2591,6 +2591,14 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues(Ops, DL); } + MachineFunction &MF = DAG.getMachineFunction(); + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + // If there is a possibilty that flat instruction access scratch memory + // then we need to use the same legalization rules we use for private. + if (AS == AMDGPUAS::FLAT_ADDRESS) + AS = MFI->hasFlatScratchInit() ? + AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS; + unsigned NumElements = MemVT.getVectorNumElements(); switch (AS) { case AMDGPUAS::CONSTANT_ADDRESS: @@ -2890,6 +2898,14 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { return expandUnalignedStore(Store, DAG); } + MachineFunction &MF = DAG.getMachineFunction(); + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + // If there is a possibilty that flat instruction access scratch memory + // then we need to use the same legalization rules we use for private. + if (AS == AMDGPUAS::FLAT_ADDRESS) + AS = MFI->hasFlatScratchInit() ? + AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS; + unsigned NumElements = VT.getVectorNumElements(); switch (AS) { case AMDGPUAS::GLOBAL_ADDRESS: diff --git a/llvm/test/CodeGen/AMDGPU/flat-address-space.ll b/llvm/test/CodeGen/AMDGPU/flat-address-space.ll index 1b6d8dc533a..0cfe6888b33 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-address-space.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-address-space.ll @@ -1,5 +1,6 @@ -; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck %s -; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga < %s | FileCheck %s +; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck %s +; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga < %s | FileCheck %s +; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=CHECK,HSA %s ; Disable optimizations in case there are optimizations added that ; specialize away generic pointer accesses. @@ -149,6 +150,28 @@ define void @flat_scratch_unaligned_store() { ret void } +; CHECK-LABEL: flat_scratch_multidword_load: +; HSA: flat_load_dword +; HSA: flat_load_dword +; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr +define void @flat_scratch_multidword_load() { + %scratch = alloca <2 x i32> + %fptr = addrspacecast <2 x i32>* %scratch to <2 x i32> addrspace(4)* + %ld = load volatile <2 x i32>, <2 x i32> addrspace(4)* %fptr + ret void +} + +; CHECK-LABEL: flat_scratch_multidword_store: +; HSA: flat_store_dword +; HSA: flat_store_dword +; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr +define void @flat_scratch_multidword_store() { + %scratch = alloca <2 x i32> + %fptr = addrspacecast <2 x i32>* %scratch to <2 x i32> addrspace(4)* + store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(4)* %fptr + ret void +} + attributes #0 = { nounwind } attributes #1 = { nounwind convergent } attributes #3 = { nounwind readnone } |

