summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll374
1 files changed, 175 insertions, 199 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll b/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll
index bfbe1c00a57..8da4d80806d 100644
--- a/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll
+++ b/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll
@@ -15,7 +15,7 @@ define void @local_unaligned_load_store_i16(i16 addrspace(3)* %p, i16 addrspace(
ret void
}
-; FUNC-LABEL: {{^}}unaligned_load_store_i16_global:
+; FUNC-LABEL: {{^}}global_unaligned_load_store_i16:
; GCN-NOHSA: buffer_load_ubyte
; GCN-NOHSA: buffer_load_ubyte
; GCN-NOHSA: buffer_store_byte
@@ -25,22 +25,25 @@ define void @local_unaligned_load_store_i16(i16 addrspace(3)* %p, i16 addrspace(
; GCN-HSA: flat_load_ubyte
; GCN-HSA: flat_store_byte
; GCN-HSA: flat_store_byte
-define void @unaligned_load_store_i16_global(i16 addrspace(1)* %p, i16 addrspace(1)* %r) #0 {
+define void @global_unaligned_load_store_i16(i16 addrspace(1)* %p, i16 addrspace(1)* %r) #0 {
%v = load i16, i16 addrspace(1)* %p, align 1
store i16 %v, i16 addrspace(1)* %r, align 1
ret void
}
; FUNC-LABEL: {{^}}local_unaligned_load_store_i32:
-; GCN: ds_read_u8
-; GCN: ds_read_u8
-; GCN: ds_read_u8
-; GCN: ds_read_u8
-; GCN: ds_write_b8
-; GCN: ds_write_b8
-; GCN: ds_write_b8
-; GCN: ds_write_b8
-; GCN: s_endpgm
+
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI-NOT: v_or
+; SI-NOT: v_lshl
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: s_endpgm
define void @local_unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) #0 {
%v = load i32, i32 addrspace(3)* %p, align 1
store i32 %v, i32 addrspace(3)* %r, align 1
@@ -98,141 +101,149 @@ define void @local_align2_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)*
ret void
}
-; FIXME: Unnecessary packing and unpacking of bytes.
; FUNC-LABEL: {{^}}local_unaligned_load_store_i64:
-; GCN: ds_read_u8
-; GCN: ds_read_u8
-; GCN: ds_read_u8
-; GCN: ds_read_u8
-; GCN: ds_read_u8
-; GCN: ds_read_u8
-; GCN: ds_read_u8
-; GCN: ds_read_u8
-
-; XGCN-NOT: v_or_b32
-; XGCN-NOT: v_lshl
-; GCN: ds_write_b8
-; XGCN-NOT: v_or_b32
-; XGCN-NOT: v_lshl
-
-; GCN: ds_write_b8
-; XGCN-NOT: v_or_b32
-; XGCN-NOT: v_lshl
-
-; GCN: ds_write_b8
-; XGCN-NOT: v_or_b32
-; XGCN-NOT: v_lshl
-
-; GCN: ds_write_b8
-; XGCN-NOT: v_or_b32
-; XGCN-NOT: v_lshl
-
-; GCN: ds_write_b8
-; XGCN-NOT: v_or_b32
-; XGCN-NOT: v_lshl
-
-; GCN: ds_write_b8
-; XGCN-NOT: v_or_b32
-; XGCN-NOT: v_lshl
-
-; GCN: ds_write_b8
-; XGCN-NOT: v_or_b32
-; XGCN-NOT: v_lshl
-; GCN: ds_write_b8
-; GCN: s_endpgm
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+
+; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+
+; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+
+; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+
+; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+
+; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+
+; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+; SI: ds_write_b8
+; SI: s_endpgm
define void @local_unaligned_load_store_i64(i64 addrspace(3)* %p, i64 addrspace(3)* %r) {
%v = load i64, i64 addrspace(3)* %p, align 1
store i64 %v, i64 addrspace(3)* %r, align 1
ret void
}
-; FUNC-LABEL: {{^}}local_unaligned_load_store_v2i32:
-; GCN: ds_read_u8
-; GCN: ds_read_u8
-; GCN: ds_read_u8
-; GCN: ds_read_u8
-; GCN: ds_read_u8
-; GCN: ds_read_u8
-; GCN: ds_read_u8
-; GCN: ds_read_u8
-
-; XGCN-NOT: v_or_b32
-; XGCN-NOT: v_lshl
-; GCN: ds_write_b8
-; XGCN-NOT: v_or_b32
-; XGCN-NOT: v_lshl
-
-; GCN: ds_write_b8
-; XGCN-NOT: v_or_b32
-; XGCN-NOT: v_lshl
-
-; GCN: ds_write_b8
-; XGCN-NOT: v_or_b32
-; XGCN-NOT: v_lshl
-
-; GCN: ds_write_b8
-; XGCN-NOT: v_or_b32
-; XGCN-NOT: v_lshl
-
-; GCN: ds_write_b8
-; XGCN-NOT: v_or_b32
-; XGCN-NOT: v_lshl
-
-; GCN: ds_write_b8
-; XGCN-NOT: v_or_b32
-; XGCN-NOT: v_lshl
-
-; GCN: ds_write_b8
-; XGCN-NOT: v_or_b32
-; XGCN-NOT: v_lshl
-; GCN: ds_write_b8
-; GCN: s_endpgm
+; SI-LABEL: {{^}}local_unaligned_load_store_v2i32:
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+
+; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+
+; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+
+; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+
+; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+
+; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+
+; SI: ds_write_b8
+; SI-NOT: v_or_b32
+; SI-NOT: v_lshl
+; SI: ds_write_b8
+; SI: s_endpgm
define void @local_unaligned_load_store_v2i32(<2 x i32> addrspace(3)* %p, <2 x i32> addrspace(3)* %r) {
%v = load <2 x i32>, <2 x i32> addrspace(3)* %p, align 1
store <2 x i32> %v, <2 x i32> addrspace(3)* %r, align 1
ret void
}
-; FUNC-LABEL: {{^}}unaligned_load_store_i64_global:
-; GCN-NOHSA: buffer_load_ubyte
-; GCN-NOHSA: buffer_load_ubyte
-; GCN-NOHSA: buffer_load_ubyte
-; GCN-NOHSA: buffer_load_ubyte
-; GCN-NOHSA: buffer_load_ubyte
-; GCN-NOHSA: buffer_load_ubyte
-; GCN-NOHSA: buffer_load_ubyte
-; GCN-NOHSA: buffer_load_ubyte
+; SI-LABEL: {{^}}global_align2_load_store_i64:
+; SI: buffer_load_ushort
+; SI: buffer_load_ushort
-; GCN-HSA: flat_load_ubyte
-; GCN-HSA: flat_load_ubyte
-; GCN-HSA: flat_load_ubyte
-; GCN-HSA: flat_load_ubyte
-; GCN-HSA: flat_load_ubyte
-; GCN-HSA: flat_load_ubyte
-; GCN-HSA: flat_load_ubyte
-; GCN-HSA: flat_load_ubyte
+; SI-NOT: v_or_
+; SI-NOT: v_lshl
-; XGCN-NOT: v_or_
-; XGCN-NOT: v_lshl
+; SI: buffer_load_ushort
-; GCN-NOHSA: buffer_store_byte
-; GCN-NOHSA: buffer_store_byte
-; GCN-NOHSA: buffer_store_byte
-; GCN-NOHSA: buffer_store_byte
-; GCN-NOHSA: buffer_store_byte
-; GCN-NOHSA: buffer_store_byte
-; GCN-NOHSA: buffer_store_byte
-; GCN-NOHSA: buffer_store_byte
+; SI-NOT: v_or_
+; SI-NOT: v_lshl
-; GCN-HSA: flat_store_byte
-; GCN-HSA: flat_store_byte
-; GCN-HSA: flat_store_byte
-; GCN-HSA: flat_store_byte
-; GCN-HSA: flat_store_byte
-; GCN-HSA: flat_store_byte
-; GCN-HSA: flat_store_byte
-; GCN-HSA: flat_store_byte
-define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) #0 {
+; SI: buffer_load_ushort
+
+; SI-NOT: v_or_
+; SI-NOT: v_lshl
+
+; SI: buffer_store_short
+; SI: buffer_store_short
+; SI: buffer_store_short
+; SI: buffer_store_short
+define void @global_align2_load_store_i64(i64 addrspace(1)* %p, i64 addrspace(1)* %r) {
+ %v = load i64, i64 addrspace(1)* %p, align 2
+ store i64 %v, i64 addrspace(1)* %r, align 2
+ ret void
+}
+
+; SI-LABEL: {{^}}unaligned_load_store_i64_global:
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+
+; SI-NOT: v_or_
+; SI-NOT: v_lshl
+
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) {
%v = load i64, i64 addrspace(1)* %p, align 1
store i64 %v, i64 addrspace(1)* %r, align 1
ret void
@@ -285,76 +296,41 @@ define void @local_unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i
ret void
}
-; FUNC-LABEL: {{^}}global_unaligned_load_store_v4i32:
-; GCN-NOHSA: buffer_load_ubyte
-; GCN-NOHSA: buffer_load_ubyte
-; GCN-NOHSA: buffer_load_ubyte
-; GCN-NOHSA: buffer_load_ubyte
-; GCN-NOHSA: buffer_load_ubyte
-; GCN-NOHSA: buffer_load_ubyte
-; GCN-NOHSA: buffer_load_ubyte
-; GCN-NOHSA: buffer_load_ubyte
-; GCN-NOHSA: buffer_load_ubyte
-; GCN-NOHSA: buffer_load_ubyte
-; GCN-NOHSA: buffer_load_ubyte
-; GCN-NOHSA: buffer_load_ubyte
-; GCN-NOHSA: buffer_load_ubyte
-; GCN-NOHSA: buffer_load_ubyte
-; GCN-NOHSA: buffer_load_ubyte
-; GCN-NOHSA: buffer_load_ubyte
-
-; GCN-NOHSA: buffer_store_byte
-; GCN-NOHSA: buffer_store_byte
-; GCN-NOHSA: buffer_store_byte
-; GCN-NOHSA: buffer_store_byte
-; GCN-NOHSA: buffer_store_byte
-; GCN-NOHSA: buffer_store_byte
-; GCN-NOHSA: buffer_store_byte
-; GCN-NOHSA: buffer_store_byte
-; GCN-NOHSA: buffer_store_byte
-; GCN-NOHSA: buffer_store_byte
-; GCN-NOHSA: buffer_store_byte
-; GCN-NOHSA: buffer_store_byte
-; GCN-NOHSA: buffer_store_byte
-; GCN-NOHSA: buffer_store_byte
-; GCN-NOHSA: buffer_store_byte
-; GCN-NOHSA: buffer_store_byte
-
-
-; GCN-HSA: flat_load_ubyte
-; GCN-HSA: flat_load_ubyte
-; GCN-HSA: flat_load_ubyte
-; GCN-HSA: flat_load_ubyte
-; GCN-HSA: flat_load_ubyte
-; GCN-HSA: flat_load_ubyte
-; GCN-HSA: flat_load_ubyte
-; GCN-HSA: flat_load_ubyte
-; GCN-HSA: flat_load_ubyte
-; GCN-HSA: flat_load_ubyte
-; GCN-HSA: flat_load_ubyte
-; GCN-HSA: flat_load_ubyte
-; GCN-HSA: flat_load_ubyte
-; GCN-HSA: flat_load_ubyte
-; GCN-HSA: flat_load_ubyte
-; GCN-HSA: flat_load_ubyte
-
-; GCN-HSA: flat_store_byte
-; GCN-HSA: flat_store_byte
-; GCN-HSA: flat_store_byte
-; GCN-HSA: flat_store_byte
-; GCN-HSA: flat_store_byte
-; GCN-HSA: flat_store_byte
-; GCN-HSA: flat_store_byte
-; GCN-HSA: flat_store_byte
-; GCN-HSA: flat_store_byte
-; GCN-HSA: flat_store_byte
-; GCN-HSA: flat_store_byte
-; GCN-HSA: flat_store_byte
-; GCN-HSA: flat_store_byte
-; GCN-HSA: flat_store_byte
-; GCN-HSA: flat_store_byte
-; GCN-HSA: flat_store_byte
-define void @global_unaligned_load_store_v4i32(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) #0 {
+; SI-LABEL: {{^}}global_unaligned_load_store_v4i32
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+define void @global_unaligned_load_store_v4i32(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) nounwind {
%v = load <4 x i32>, <4 x i32> addrspace(1)* %p, align 1
store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 1
ret void
OpenPOWER on IntegriCloud