AMDGPU: Split MUBUF offset into aligned components

Summary: Atomic buffer operations do not work (and trap on gfx9) when the components are unaligned, even if their sum is aligned. Previously, we generated an offset of 4156 without an SGPR by splitting it as 4095 + 61 (immediate + inline constant). The highest offset for which we can do this correctly is 4156 = 4092 + 64. Fixes dEQP-GLES31.functional.ssbo.atomic.* Reviewers: arsenm Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye Differential Revision: https://reviews.llvm.org/D37850 llvm-svn: 315302
author: Nicolai Haehnle <nhaehnle@gmail.com> 2017-10-10 12:22:23 +0000
committer: Nicolai Haehnle <nhaehnle@gmail.com> 2017-10-10 12:22:23 +0000
commit: 312b64f4d703947779ce56abdcc0d59741ff99df (patch)
tree: 9eb7bccd2f72bdf3326848767801ab20070931af /llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
parent: 0f22a06b4de75d4d02741b9f39d39df1cd4e8066 (diff)
download: bcm5719-llvm-312b64f4d703947779ce56abdcc0d59741ff99df.tar.gz
bcm5719-llvm-312b64f4d703947779ce56abdcc0d59741ff99df.zip
1 files changed, 16 insertions, 10 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 7faf3e123f8..5bd1092196a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1235,24 +1235,30 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
                                              SDValue &SOffset,
                                              SDValue &ImmOffset) const {
   SDLoc DL(Constant);
+  const uint32_t Align = 4;
+  const uint32_t MaxImm = alignDown(4095, Align);
   uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
   uint32_t Overflow = 0;
 
-  if (Imm >= 4096) {
-    if (Imm <= 4095 + 64) {
-      // Use an SOffset inline constant for 1..64
-      Overflow = Imm - 4095;
-      Imm = 4095;
+  if (Imm > MaxImm) {
+    if (Imm <= MaxImm + 64) {
+      // Use an SOffset inline constant for 4..64
+      Overflow = Imm - MaxImm;
+      Imm = MaxImm;
     } else {
       // Try to keep the same value in SOffset for adjacent loads, so that
       // the corresponding register contents can be re-used.
       //
-      // Load values with all low-bits set into SOffset, so that a larger
-      // range of values can be covered using s_movk_i32
-      uint32_t High = (Imm + 1) & ~4095;
-      uint32_t Low = (Imm + 1) & 4095;
+      // Load values with all low-bits (except for alignment bits) set into
+      // SOffset, so that a larger range of values can be covered using
+      // s_movk_i32.
+      //
+      // Atomic operations fail to work correctly when individual address
+      // components are unaligned, even if their sum is aligned.
+      uint32_t High = (Imm + Align) & ~4095;
+      uint32_t Low = (Imm + Align) & 4095;
       Imm = Low;
-      Overflow = High - 1;
+      Overflow = High - Align;
     }
   }
author	Nicolai Haehnle <nhaehnle@gmail.com>	2017-10-10 12:22:23 +0000
committer	Nicolai Haehnle <nhaehnle@gmail.com>	2017-10-10 12:22:23 +0000
commit	312b64f4d703947779ce56abdcc0d59741ff99df (patch)
tree	9eb7bccd2f72bdf3326848767801ab20070931af /llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
parent	0f22a06b4de75d4d02741b9f39d39df1cd4e8066 (diff)
download	bcm5719-llvm-312b64f4d703947779ce56abdcc0d59741ff99df.tar.gz bcm5719-llvm-312b64f4d703947779ce56abdcc0d59741ff99df.zip