summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2018-02-08 01:56:14 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2018-02-08 01:56:14 +0000
commitb02cebf55260ee2b0e1a50b8a5f1e0029ebffaab (patch)
tree0340168d699f1788ba5a1bb340542b1cc2b88799 /llvm/test/CodeGen
parent3a47fddfd37beb52f139a62c01622bbd97c99412 (diff)
downloadbcm5719-llvm-b02cebf55260ee2b0e1a50b8a5f1e0029ebffaab.tar.gz
bcm5719-llvm-b02cebf55260ee2b0e1a50b8a5f1e0029ebffaab.zip
AMDGPU: Fix incorrect reordering when inline asm defines LDS address
Defs of operands outside of the instruction's explicit defs need to be checked. llvm-svn: 324554
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/AMDGPU/merge-load-store.mir61
1 files changed, 61 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/merge-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-load-store.mir
index 78ed2497257..1fcbd60b688 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-load-store.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-load-store.mir
@@ -24,6 +24,41 @@
store i32 %4, i32 addrspace(3)* %ptr.0
ret void
}
+
+ @lds0 = external dso_local unnamed_addr addrspace(3) global [256 x i32], align 4
+ @lds1 = external dso_local unnamed_addr addrspace(3) global [256 x i32], align 4
+ @lds2 = external dso_local unnamed_addr addrspace(3) global [256 x i32], align 4
+ @lds3 = external dso_local unnamed_addr addrspace(3) global [256 x i32], align 4
+
+ define void @asm_defines_address() #0 {
+ bb:
+ %tmp1 = load i32, i32 addrspace(3)* getelementptr inbounds ([256 x i32], [256 x i32] addrspace(3)* @lds0, i32 0, i32 0), align 4
+ %0 = and i32 %tmp1, 255
+ %tmp3 = load i32, i32 addrspace(3)* getelementptr ([256 x i32], [256 x i32] addrspace(3)* @lds1, i32 0, i32 undef), align 4
+ %tmp6 = load i32, i32 addrspace(3)* getelementptr ([256 x i32], [256 x i32] addrspace(3)* @lds3, i32 0, i32 undef), align 4
+ %tmp7 = tail call i32 asm "v_or_b32 $0, 0, $1", "=v,v"(i32 %tmp6) #1
+ %tmp10 = lshr i32 %tmp7, 16
+ %tmp11 = and i32 %tmp10, 255
+ %tmp12 = getelementptr inbounds [256 x i32], [256 x i32] addrspace(3)* @lds1, i32 0, i32 %tmp11
+ %tmp13 = load i32, i32 addrspace(3)* %tmp12, align 4
+ %tmp14 = xor i32 %tmp3, %tmp13
+ %tmp15 = lshr i32 %tmp14, 8
+ %tmp16 = and i32 %tmp15, 16711680
+ %tmp19 = lshr i32 %tmp16, 16
+ %tmp20 = and i32 %tmp19, 255
+ %tmp21 = getelementptr inbounds [256 x i32], [256 x i32] addrspace(3)* @lds1, i32 0, i32 %tmp20
+ %tmp22 = load i32, i32 addrspace(3)* %tmp21, align 4
+ %tmp24 = load i32, i32 addrspace(3)* getelementptr ([256 x i32], [256 x i32] addrspace(3)* @lds2, i32 0, i32 undef), align 4
+ %tmp25 = xor i32 %tmp22, %tmp24
+ %tmp26 = and i32 %tmp25, -16777216
+ %tmp28 = or i32 %0, %tmp26
+ store volatile i32 %tmp28, i32 addrspace(1)* undef
+ ret void
+ }
+
+ attributes #0 = { convergent nounwind }
+ attributes #1 = { convergent nounwind readnone }
+
...
---
name: mem_dependency
@@ -68,3 +103,29 @@ body: |
S_ENDPGM
...
+---
+# Make sure the asm def isn't moved after the point where it's used for
+# the address.
+# CHECK-LABEL: name: asm_defines_address
+# CHECK: DS_READ2ST64_B32
+# CHECK: DS_READ2ST64_B32
+# CHECK: INLINEASM
+# CHECK: DS_READ_B32
+# CHECK: DS_READ_B32
+name: asm_defines_address
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vgpr_32, preferred-register: '' }
+body: |
+ bb.0:
+ %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %2:vgpr_32 = DS_READ_B32 %1, 3072, 0, implicit $m0, implicit $exec :: (dereferenceable load 4 from `i32 addrspace(3)* getelementptr inbounds ([256 x i32], [256 x i32] addrspace(3)* @lds0, i32 0, i32 0)`, addrspace 3)
+ %3:vgpr_32 = DS_READ_B32 %1, 2048, 0, implicit $m0, implicit $exec :: (load 4 from `i32 addrspace(3)* getelementptr ([256 x i32], [256 x i32] addrspace(3)* @lds1, i32 0, i32 undef)`, addrspace 3)
+ %4:vgpr_32 = DS_READ_B32 %1, 1024, 0, implicit $m0, implicit $exec :: (load 4 from `i32 addrspace(3)* getelementptr ([256 x i32], [256 x i32] addrspace(3)* @lds3, i32 0, i32 undef)`, addrspace 3)
+ INLINEASM &"v_or_b32 $0, 0, $1", 32, 327690, def %0, 327689, %4
+ %5:vgpr_32 = DS_READ_B32 %0, 2048, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp12, addrspace 3)
+ %6:vgpr_32 = DS_READ_B32 %5, 2048, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp21, addrspace 3)
+ %7:vgpr_32 = DS_READ_B32 %1, 0, 0, implicit $m0, implicit $exec :: (load 4 from `i32 addrspace(3)* getelementptr ([256 x i32], [256 x i32] addrspace(3)* @lds2, i32 0, i32 undef)`, addrspace 3)
+ S_SETPC_B64_return undef $sgpr30_sgpr31, implicit %6, implicit %7
+
+...
OpenPOWER on IntegriCloud