diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll | 62 |
1 files changed, 31 insertions, 31 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll b/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll index 0f76a54975e..68aacd084bf 100644 --- a/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll +++ b/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll @@ -8,7 +8,7 @@ ; SI: ds_write_b8 ; SI: ds_write_b8 ; SI: s_endpgm -define void @local_unaligned_load_store_i16(i16 addrspace(3)* %p, i16 addrspace(3)* %r) #0 { +define amdgpu_kernel void @local_unaligned_load_store_i16(i16 addrspace(3)* %p, i16 addrspace(3)* %r) #0 { %v = load i16, i16 addrspace(3)* %p, align 1 store i16 %v, i16 addrspace(3)* %r, align 1 ret void @@ -23,7 +23,7 @@ define void @local_unaligned_load_store_i16(i16 addrspace(3)* %p, i16 addrspace( ; UNALIGNED: buffer_load_ushort ; UNALIGNED: buffer_store_short ; SI: s_endpgm -define void @global_unaligned_load_store_i16(i16 addrspace(1)* %p, i16 addrspace(1)* %r) #0 { +define amdgpu_kernel void @global_unaligned_load_store_i16(i16 addrspace(1)* %p, i16 addrspace(1)* %r) #0 { %v = load i16, i16 addrspace(1)* %p, align 1 store i16 %v, i16 addrspace(1)* %r, align 1 ret void @@ -42,7 +42,7 @@ define void @global_unaligned_load_store_i16(i16 addrspace(1)* %p, i16 addrspace ; SI: ds_write_b8 ; SI: ds_write_b8 ; SI: s_endpgm -define void @local_unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) #0 { +define amdgpu_kernel void @local_unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) #0 { %v = load i32, i32 addrspace(3)* %p, align 1 store i32 %v, i32 addrspace(3)* %r, align 1 ret void @@ -60,7 +60,7 @@ define void @local_unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace( ; UNALIGNED: buffer_load_dword ; UNALIGNED: buffer_store_dword -define void @global_unaligned_load_store_i32(i32 addrspace(1)* %p, i32 addrspace(1)* %r) #0 { +define amdgpu_kernel void @global_unaligned_load_store_i32(i32 addrspace(1)* %p, i32 addrspace(1)* %r) #0 { %v = load i32, i32 addrspace(1)* %p, align 1 store i32 %v, i32 addrspace(1)* %r, align 1 ret void @@ -74,7 +74,7 @@ define void @global_unaligned_load_store_i32(i32 addrspace(1)* %p, i32 addrspace ; UNALIGNED: buffer_load_dword ; UNALIGNED: buffer_store_dword -define void @global_align2_load_store_i32(i32 addrspace(1)* %p, i32 addrspace(1)* %r) #0 { +define amdgpu_kernel void @global_align2_load_store_i32(i32 addrspace(1)* %p, i32 addrspace(1)* %r) #0 { %v = load i32, i32 addrspace(1)* %p, align 2 store i32 %v, i32 addrspace(1)* %r, align 2 ret void @@ -85,7 +85,7 @@ define void @global_align2_load_store_i32(i32 addrspace(1)* %p, i32 addrspace(1) ; GCN: ds_read_u16 ; GCN: ds_write_b16 ; GCN: ds_write_b16 -define void @local_align2_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) #0 { +define amdgpu_kernel void @local_align2_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) #0 { %v = load i32, i32 addrspace(3)* %p, align 2 store i32 %v, i32 addrspace(3)* %r, align 2 ret void @@ -132,7 +132,7 @@ define void @local_align2_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* ; SI-NOT: v_lshl ; SI: ds_write_b8 ; SI: s_endpgm -define void @local_unaligned_load_store_i64(i64 addrspace(3)* %p, i64 addrspace(3)* %r) #0 { +define amdgpu_kernel void @local_unaligned_load_store_i64(i64 addrspace(3)* %p, i64 addrspace(3)* %r) #0 { %v = load i64, i64 addrspace(3)* %p, align 1 store i64 %v, i64 addrspace(3)* %r, align 1 ret void @@ -179,7 +179,7 @@ define void @local_unaligned_load_store_i64(i64 addrspace(3)* %p, i64 addrspace( ; SI-NOT: v_lshl ; SI: ds_write_b8 ; SI: s_endpgm -define void @local_unaligned_load_store_v2i32(<2 x i32> addrspace(3)* %p, <2 x i32> addrspace(3)* %r) #0 { +define amdgpu_kernel void @local_unaligned_load_store_v2i32(<2 x i32> addrspace(3)* %p, <2 x i32> addrspace(3)* %r) #0 { %v = load <2 x i32>, <2 x i32> addrspace(3)* %p, align 1 store <2 x i32> %v, <2 x i32> addrspace(3)* %r, align 1 ret void @@ -209,7 +209,7 @@ define void @local_unaligned_load_store_v2i32(<2 x i32> addrspace(3)* %p, <2 x i ; UNALIGNED: buffer_load_dwordx2 ; UNALIGNED: buffer_store_dwordx2 -define void @global_align2_load_store_i64(i64 addrspace(1)* %p, i64 addrspace(1)* %r) #0 { +define amdgpu_kernel void @global_align2_load_store_i64(i64 addrspace(1)* %p, i64 addrspace(1)* %r) #0 { %v = load i64, i64 addrspace(1)* %p, align 2 store i64 %v, i64 addrspace(1)* %r, align 2 ret void @@ -239,7 +239,7 @@ define void @global_align2_load_store_i64(i64 addrspace(1)* %p, i64 addrspace(1) ; UNALIGNED: buffer_load_dwordx2 ; UNALIGNED: buffer_store_dwordx2 -define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) #0 { +define amdgpu_kernel void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) #0 { %v = load i64, i64 addrspace(1)* %p, align 1 store i64 %v, i64 addrspace(1)* %r, align 1 ret void @@ -286,7 +286,7 @@ define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace ; GCN: ds_write_b8 ; GCN: ds_write_b8 ; GCN: s_endpgm -define void @local_unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) #0 { +define amdgpu_kernel void @local_unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) #0 { %v = load <4 x i32>, <4 x i32> addrspace(3)* %p, align 1 store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1 ret void @@ -329,7 +329,7 @@ define void @local_unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i ; UNALIGNED: buffer_load_dwordx4 ; UNALIGNED: buffer_store_dwordx4 -define void @global_unaligned_load_store_v4i32(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) #0 { +define amdgpu_kernel void @global_unaligned_load_store_v4i32(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) #0 { %v = load <4 x i32>, <4 x i32> addrspace(1)* %p, align 1 store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 1 ret void @@ -337,7 +337,7 @@ define void @global_unaligned_load_store_v4i32(<4 x i32> addrspace(1)* %p, <4 x ; FUNC-LABEL: {{^}}local_load_i64_align_4: ; GCN: ds_read2_b32 -define void @local_load_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { +define amdgpu_kernel void @local_load_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { %val = load i64, i64 addrspace(3)* %in, align 4 store i64 %val, i64 addrspace(1)* %out, align 8 ret void @@ -345,7 +345,7 @@ define void @local_load_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrsp ; FUNC-LABEL: {{^}}local_load_i64_align_4_with_offset ; GCN: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9 -define void @local_load_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { +define amdgpu_kernel void @local_load_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { %ptr = getelementptr i64, i64 addrspace(3)* %in, i32 4 %val = load i64, i64 addrspace(3)* %ptr, align 4 store i64 %val, i64 addrspace(1)* %out, align 8 @@ -356,7 +356,7 @@ define void @local_load_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits ; GCN: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset1:1 ; GCN: s_endpgm -define void @local_load_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { +define amdgpu_kernel void @local_load_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { %ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)* %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255 %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)* @@ -375,7 +375,7 @@ define void @local_load_i64_align_4_with_split_offset(i64 addrspace(1)* nocaptur ; GCN: ds_read_u8 ; GCN: ds_read_u8 ; GCN: store_dwordx2 -define void @local_load_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { +define amdgpu_kernel void @local_load_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { %val = load i64, i64 addrspace(3)* %in, align 1 store i64 %val, i64 addrspace(1)* %out, align 8 ret void @@ -383,7 +383,7 @@ define void @local_load_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrsp ; FUNC-LABEL: {{^}}local_store_i64_align_4: ; GCN: ds_write2_b32 -define void @local_store_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 { +define amdgpu_kernel void @local_store_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 { store i64 %val, i64 addrspace(3)* %out, align 4 ret void } @@ -391,7 +391,7 @@ define void @local_store_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 { ; FUNC-LABEL: {{^}}local_store_i64_align_4_with_offset ; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9 ; GCN: s_endpgm -define void @local_store_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 { +define amdgpu_kernel void @local_store_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 { %ptr = getelementptr i64, i64 addrspace(3)* %out, i32 4 store i64 0, i64 addrspace(3)* %ptr, align 4 ret void @@ -401,7 +401,7 @@ define void @local_store_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 { ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits ; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1 ; GCN: s_endpgm -define void @local_store_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 { +define amdgpu_kernel void @local_store_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 { %ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)* %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255 %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)* @@ -418,7 +418,7 @@ define void @local_store_i64_align_4_with_split_offset(i64 addrspace(3)* %out) # ; UNALIGNED: s_load_dword ; SI: buffer_store_dword -define void @constant_unaligned_load_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 { +define amdgpu_kernel void @constant_unaligned_load_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 { %v = load i32, i32 addrspace(2)* %p, align 1 store i32 %v, i32 addrspace(1)* %r, align 4 ret void @@ -430,7 +430,7 @@ define void @constant_unaligned_load_i32(i32 addrspace(2)* %p, i32 addrspace(1)* ; UNALIGNED: s_load_dword ; UNALIGNED: buffer_store_dword -define void @constant_align2_load_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 { +define amdgpu_kernel void @constant_align2_load_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 { %v = load i32, i32 addrspace(2)* %p, align 2 store i32 %v, i32 addrspace(1)* %r, align 4 ret void @@ -444,7 +444,7 @@ define void @constant_align2_load_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r ; UNALIGNED: s_load_dwordx2 ; UNALIGNED: buffer_store_dwordx2 -define void @constant_align2_load_i64(i64 addrspace(2)* %p, i64 addrspace(1)* %r) #0 { +define amdgpu_kernel void @constant_align2_load_i64(i64 addrspace(2)* %p, i64 addrspace(1)* %r) #0 { %v = load i64, i64 addrspace(2)* %p, align 2 store i64 %v, i64 addrspace(1)* %r, align 4 ret void @@ -453,7 +453,7 @@ define void @constant_align2_load_i64(i64 addrspace(2)* %p, i64 addrspace(1)* %r ; SI-LABEL: {{^}}constant_align4_load_i64: ; SI: s_load_dwordx2 ; SI: buffer_store_dwordx2 -define void @constant_align4_load_i64(i64 addrspace(2)* %p, i64 addrspace(1)* %r) #0 { +define amdgpu_kernel void @constant_align4_load_i64(i64 addrspace(2)* %p, i64 addrspace(1)* %r) #0 { %v = load i64, i64 addrspace(2)* %p, align 4 store i64 %v, i64 addrspace(1)* %r, align 4 ret void @@ -462,7 +462,7 @@ define void @constant_align4_load_i64(i64 addrspace(2)* %p, i64 addrspace(1)* %r ; SI-LABEL: {{^}}constant_align4_load_v4i32: ; SI: s_load_dwordx4 ; SI: buffer_store_dwordx4 -define void @constant_align4_load_v4i32(<4 x i32> addrspace(2)* %p, <4 x i32> addrspace(1)* %r) #0 { +define amdgpu_kernel void @constant_align4_load_v4i32(<4 x i32> addrspace(2)* %p, <4 x i32> addrspace(1)* %r) #0 { %v = load <4 x i32>, <4 x i32> addrspace(2)* %p, align 4 store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 4 ret void @@ -482,7 +482,7 @@ define void @constant_align4_load_v4i32(<4 x i32> addrspace(2)* %p, <4 x i32> ad ; UNALIGNED: buffer_load_dwordx2 ; SI: buffer_store_dwordx2 -define void @constant_unaligned_load_v2i32(<2 x i32> addrspace(2)* %p, <2 x i32> addrspace(1)* %r) #0 { +define amdgpu_kernel void @constant_unaligned_load_v2i32(<2 x i32> addrspace(2)* %p, <2 x i32> addrspace(1)* %r) #0 { %v = load <2 x i32>, <2 x i32> addrspace(2)* %p, align 1 store <2 x i32> %v, <2 x i32> addrspace(1)* %r, align 4 ret void @@ -512,7 +512,7 @@ define void @constant_unaligned_load_v2i32(<2 x i32> addrspace(2)* %p, <2 x i32> ; UNALIGNED: buffer_load_dwordx4 ; SI: buffer_store_dwordx4 -define void @constant_unaligned_load_v4i32(<4 x i32> addrspace(2)* %p, <4 x i32> addrspace(1)* %r) #0 { +define amdgpu_kernel void @constant_unaligned_load_v4i32(<4 x i32> addrspace(2)* %p, <4 x i32> addrspace(1)* %r) #0 { %v = load <4 x i32>, <4 x i32> addrspace(2)* %p, align 1 store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 4 ret void @@ -521,7 +521,7 @@ define void @constant_unaligned_load_v4i32(<4 x i32> addrspace(2)* %p, <4 x i32> ; SI-LABEL: {{^}}constant_align4_load_i8: ; SI: buffer_load_ubyte ; SI: buffer_store_byte -define void @constant_align4_load_i8(i8 addrspace(2)* %p, i8 addrspace(1)* %r) #0 { +define amdgpu_kernel void @constant_align4_load_i8(i8 addrspace(2)* %p, i8 addrspace(1)* %r) #0 { %v = load i8, i8 addrspace(2)* %p, align 4 store i8 %v, i8 addrspace(1)* %r, align 4 ret void @@ -530,7 +530,7 @@ define void @constant_align4_load_i8(i8 addrspace(2)* %p, i8 addrspace(1)* %r) # ; SI-LABEL: {{^}}constant_align2_load_i8: ; SI: buffer_load_ubyte ; SI: buffer_store_byte -define void @constant_align2_load_i8(i8 addrspace(2)* %p, i8 addrspace(1)* %r) #0 { +define amdgpu_kernel void @constant_align2_load_i8(i8 addrspace(2)* %p, i8 addrspace(1)* %r) #0 { %v = load i8, i8 addrspace(2)* %p, align 2 store i8 %v, i8 addrspace(1)* %r, align 2 ret void @@ -541,7 +541,7 @@ define void @constant_align2_load_i8(i8 addrspace(2)* %p, i8 addrspace(1)* %r) # ; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[LO]] ; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HI]] ; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}} -define void @constant_align4_merge_load_2_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 { +define amdgpu_kernel void @constant_align4_merge_load_2_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 { %gep0 = getelementptr i32, i32 addrspace(2)* %p, i64 1 %v0 = load i32, i32 addrspace(2)* %p, align 4 %v1 = load i32, i32 addrspace(2)* %gep0, align 4 @@ -571,7 +571,7 @@ define void @constant_align4_merge_load_2_i32(i32 addrspace(2)* %p, i32 addrspac ; SI: ds_read_u8 ; SI: ScratchSize: 0{{$}} -define void @local_load_align1_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(3)* %in) #0 { +define amdgpu_kernel void @local_load_align1_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(3)* %in) #0 { %ld = load <16 x i8>, <16 x i8> addrspace(3)* %in, align 1 store <16 x i8> %ld, <16 x i8> addrspace(1)* %out ret void @@ -596,7 +596,7 @@ define void @local_load_align1_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> add ; SI: ds_write_b8 ; SI: ScratchSize: 0{{$}} -define void @local_store_align1_v16i8(<16 x i8> addrspace(3)* %out) #0 { +define amdgpu_kernel void @local_store_align1_v16i8(<16 x i8> addrspace(3)* %out) #0 { store <16 x i8> zeroinitializer, <16 x i8> addrspace(3)* %out, align 1 ret void } |