diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/addrspacecast.ll | 231 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll | 57 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/invalid-addrspacecast.ll | 8 |
3 files changed, 274 insertions, 22 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll index bfe1e7c5675..c98e4acb30a 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll @@ -1,18 +1,208 @@ -; RUN: not llc -O0 -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s 2>&1 | FileCheck -check-prefix=ERROR %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=HSA %s -; ERROR: addrspacecast not implemented +; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast: +; HSA: enable_sgpr_private_segment_buffer = 1 +; HSA: enable_sgpr_dispatch_ptr = 0 +; HSA: enable_sgpr_queue_ptr = 1 -; XUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s -; XUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s -; XUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s -; XUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s +; HSA-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}} +; HSA-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}} + +; HSA-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]] +; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] + +; HSA-DAG: v_cmp_ne_i32_e64 vcc, -1, [[PTR]] +; HSA-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]] +; HSA-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] +; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 + +; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]] +define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 { + %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* + store volatile i32 7, i32 addrspace(4)* %stof + ret void +} + +; HSA-LABEL: {{^}}use_private_to_flat_addrspacecast: +; HSA: enable_sgpr_private_segment_buffer = 1 +; HSA: enable_sgpr_dispatch_ptr = 0 +; HSA: enable_sgpr_queue_ptr = 1 + +; HSA-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}} +; HSA-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}} + +; HSA-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]] +; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] + +; HSA-DAG: v_cmp_ne_i32_e64 vcc, -1, [[PTR]] +; HSA-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]] +; HSA-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] +; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 + +; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]] +define void @use_private_to_flat_addrspacecast(i32* %ptr) #0 { + %stof = addrspacecast i32* %ptr to i32 addrspace(4)* + store volatile i32 7, i32 addrspace(4)* %stof + ret void +} + +; no-op +; HSA-LABEL: {{^}}use_global_to_flat_addrspacecast: +; HSA: enable_sgpr_queue_ptr = 0 + +; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}} +; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]] +; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]] +; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 +; HSA: flat_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]] +define void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #0 { + %stof = addrspacecast i32 addrspace(1)* %ptr to i32 addrspace(4)* + store volatile i32 7, i32 addrspace(4)* %stof + ret void +} + +; no-op +; HSA-LABEl: {{^}}use_constant_to_flat_addrspacecast: +; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}} +; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]] +; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]] +; HSA: flat_load_dword v{{[0-9]+}}, v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}} +define void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #0 { + %stof = addrspacecast i32 addrspace(2)* %ptr to i32 addrspace(4)* + %ld = load volatile i32, i32 addrspace(4)* %stof + ret void +} + +; HSA-LABEL: {{^}}use_flat_to_group_addrspacecast: +; HSA: enable_sgpr_private_segment_buffer = 1 +; HSA: enable_sgpr_dispatch_ptr = 0 +; HSA: enable_sgpr_queue_ptr = 0 + +; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}} +; HSA-DAG: v_cmp_ne_i64_e64 vcc, 0, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}} +; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]] +; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]] +; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}} +; HSA: ds_write_b32 [[CASTPTR]], v[[K]] +define void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #0 { + %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(3)* + store volatile i32 0, i32 addrspace(3)* %ftos + ret void +} + +; HSA-LABEL: {{^}}use_flat_to_private_addrspacecast: +; HSA: enable_sgpr_private_segment_buffer = 1 +; HSA: enable_sgpr_dispatch_ptr = 0 +; HSA: enable_sgpr_queue_ptr = 0 + +; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}} +; HSA-DAG: v_cmp_ne_i64_e64 vcc, 0, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}} +; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]] +; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]] +; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}} +; HSA: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} +define void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #0 { + %ftos = addrspacecast i32 addrspace(4)* %ptr to i32* + store volatile i32 0, i32* %ftos + ret void +} + +; HSA-LABEL: {{^}}use_flat_to_global_addrspacecast: +; HSA: enable_sgpr_queue_ptr = 0 + +; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}, s[4:5], 0x0 +; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]] +; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]] +; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0 +; HSA: flat_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]] +define void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #0 { + %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)* + store volatile i32 0, i32 addrspace(1)* %ftos + ret void +} + +; HSA-LABEL: {{^}}use_flat_to_constant_addrspacecast: +; HSA: enable_sgpr_queue_ptr = 0 + +; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}, s[4:5], 0x0 +; HSA: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, 0x0 +define void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #0 { + %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(2)* + load volatile i32, i32 addrspace(2)* %ftos + ret void +} + +; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast: +; HSA: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10 +; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]] +; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} +; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} +; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]] +define void @cast_0_group_to_flat_addrspacecast() #0 { + %cast = addrspacecast i32 addrspace(3)* null to i32 addrspace(4)* + store i32 7, i32 addrspace(4)* %cast + ret void +} + +; HSA-LABEL: {{^}}cast_0_flat_to_group_addrspacecast: +; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}} +; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} +; HSA: ds_write_b32 [[PTR]], [[K]] +define void @cast_0_flat_to_group_addrspacecast() #0 { + %cast = addrspacecast i32 addrspace(4)* null to i32 addrspace(3)* + store i32 7, i32 addrspace(3)* %cast + ret void +} + +; HSA-LABEL: {{^}}cast_neg1_group_to_flat_addrspacecast: +; HSA: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} +; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} +; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} +; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]] +define void @cast_neg1_group_to_flat_addrspacecast() #0 { + %cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32 addrspace(4)* + store i32 7, i32 addrspace(4)* %cast + ret void +} + +; HSA-LABEL: {{^}}cast_neg1_flat_to_group_addrspacecast: +; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}} +; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} +; HSA: ds_write_b32 [[PTR]], [[K]] +define void @cast_neg1_flat_to_group_addrspacecast() #0 { + %cast = addrspacecast i32 addrspace(4)* inttoptr (i64 -1 to i32 addrspace(4)*) to i32 addrspace(3)* + store i32 7, i32 addrspace(3)* %cast + ret void +} + +; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast: +; HSA: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11 +; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]] +; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} +; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} +; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]] +define void @cast_0_private_to_flat_addrspacecast() #0 { + %cast = addrspacecast i32* null to i32 addrspace(4)* + store i32 7, i32 addrspace(4)* %cast + ret void +} + +; HSA-LABEL: {{^}}cast_0_flat_to_private_addrspacecast: +; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}} +; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} +; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen +define void @cast_0_flat_to_private_addrspacecast() #0 { + %cast = addrspacecast i32 addrspace(4)* null to i32 addrspace(0)* + store i32 7, i32* %cast + ret void +} ; Disable optimizations in case there are optimizations added that ; specialize away generic pointer accesses. -; CHECK-LABEL: {{^}}branch_use_flat_i32: -; CHECK: flat_store_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} -; CHECK: s_endpgm +; HSA-LABEL: {{^}}branch_use_flat_i32: +; HSA: flat_store_dword {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} +; HSA: s_endpgm define void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 { entry: %cmp = icmp ne i32 %c, 0 @@ -34,20 +224,17 @@ end: ret void } -; TODO: This should not be zero when registers are used for small -; scratch allocations again. - ; Check for prologue initializing special SGPRs pointing to scratch. -; CHECK-LABEL: {{^}}store_flat_scratch: -; CHECK: s_movk_i32 flat_scratch_lo, 0 -; CHECK-NO-PROMOTE: s_movk_i32 flat_scratch_hi, 0x28{{$}} -; CHECK-PROMOTE: s_movk_i32 flat_scratch_hi, 0x0{{$}} -; CHECK: flat_store_dword -; CHECK: s_barrier -; CHECK: flat_load_dword +; HSA-LABEL: {{^}}store_flat_scratch: +; HSA: s_mov_b32 flat_scratch_lo, s9 +; HSA: s_add_u32 [[ADD:s[0-9]+]], s8, s11 +; HSA: s_lshr_b32 flat_scratch_hi, [[ADD]], 8 +; HSA: flat_store_dword +; HSA: s_barrier +; HSA: flat_load_dword define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 { %alloca = alloca i32, i32 9, align 4 - %x = call i32 @llvm.amdgcn.workitem.id.x() #3 + %x = call i32 @llvm.amdgcn.workitem.id.x() #2 %pptr = getelementptr i32, i32* %alloca, i32 %x %fptr = addrspacecast i32* %pptr to i32 addrspace(4)* store i32 %x, i32 addrspace(4)* %fptr @@ -59,8 +246,8 @@ define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 { } declare void @llvm.amdgcn.s.barrier() #1 -declare i32 @llvm.amdgcn.workitem.id.x() #3 +declare i32 @llvm.amdgcn.workitem.id.x() #2 attributes #0 = { nounwind } attributes #1 = { nounwind convergent } -attributes #3 = { nounwind readnone } +attributes #2 = { nounwind readnone } diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll index 10dac31fec0..084a6933da2 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll @@ -164,6 +164,63 @@ define void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 { ret void } +; HSA: define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #11 { +define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 { + %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* + store volatile i32 0, i32 addrspace(4)* %stof + ret void +} + +; HSA: define void @use_private_to_flat_addrspacecast(i32* %ptr) #11 { +define void @use_private_to_flat_addrspacecast(i32* %ptr) #1 { + %stof = addrspacecast i32* %ptr to i32 addrspace(4)* + store volatile i32 0, i32 addrspace(4)* %stof + ret void +} + +; HSA: define void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #1 { +define void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #1 { + %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(3)* + store volatile i32 0, i32 addrspace(3)* %ftos + ret void +} + +; HSA: define void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #1 { +define void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #1 { + %ftos = addrspacecast i32 addrspace(4)* %ptr to i32* + store volatile i32 0, i32* %ftos + ret void +} + +; No-op addrspacecast should not use queue ptr +; HSA: define void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 { +define void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 { + %stof = addrspacecast i32 addrspace(1)* %ptr to i32 addrspace(4)* + store volatile i32 0, i32 addrspace(4)* %stof + ret void +} + +; HSA: define void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 { +define void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 { + %stof = addrspacecast i32 addrspace(2)* %ptr to i32 addrspace(4)* + %ld = load volatile i32, i32 addrspace(4)* %stof + ret void +} + +; HSA: define void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #1 { +define void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #1 { + %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)* + store volatile i32 0, i32 addrspace(1)* %ftos + ret void +} + +; HSA: define void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #1 { +define void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #1 { + %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(2)* + %ld = load volatile i32, i32 addrspace(2)* %ftos + ret void +} + attributes #0 = { nounwind readnone } attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/invalid-addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/invalid-addrspacecast.ll new file mode 100644 index 00000000000..c29434f5eca --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/invalid-addrspacecast.ll @@ -0,0 +1,8 @@ +; RUN: not llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s 2>&1 | FileCheck -check-prefix=ERROR %s + +; ERROR: error: <unknown>:0:0: in function use_group_to_global_addrspacecast void (i32 addrspace(3)*): invalid addrspacecast +define void @use_group_to_global_addrspacecast(i32 addrspace(3)* %ptr) { + %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(1)* + store volatile i32 0, i32 addrspace(1)* %stof + ret void +} |