summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorWei Ding <wei.ding2@amd.com>2017-07-26 21:07:28 +0000
committerWei Ding <wei.ding2@amd.com>2017-07-26 21:07:28 +0000
commita126a13bb3eba49860b53691324748009c87841c (patch)
tree89c298c9d569456d998b2f52e398e3937762830d /llvm/test
parent92d4dd0da711a977012f67718f1a3a311ad03c13 (diff)
downloadbcm5719-llvm-a126a13bb3eba49860b53691324748009c87841c.tar.gz
bcm5719-llvm-a126a13bb3eba49860b53691324748009c87841c.zip
AMDGPU : Widen extending scalar loads to 32-bits.
Differential Revision: http://reviews.llvm.org/D35146 llvm-svn: 309178
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll192
2 files changed, 193 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll b/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll
index 68aacd084bf..a08535fc859 100644
--- a/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll
+++ b/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll
@@ -519,7 +519,7 @@ define amdgpu_kernel void @constant_unaligned_load_v4i32(<4 x i32> addrspace(2)*
}
; SI-LABEL: {{^}}constant_align4_load_i8:
-; SI: buffer_load_ubyte
+; SI: s_load_dword
; SI: buffer_store_byte
define amdgpu_kernel void @constant_align4_load_i8(i8 addrspace(2)* %p, i8 addrspace(1)* %r) #0 {
%v = load i8, i8 addrspace(2)* %p, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll b/llvm/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll
new file mode 100644
index 00000000000..e200f7b64d1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll
@@ -0,0 +1,192 @@
+; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare < %s | FileCheck -check-prefix=OPT %s
+
+declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
+
+; OPT-LABEL: @constant_load_i1
+; OPT: load i1
+; OPT-NEXT: store i1
+define amdgpu_kernel void @constant_load_i1(i1 addrspace(1)* %out, i1 addrspace(2)* %in) #0 {
+ %val = load i1, i1 addrspace(2)* %in
+ store i1 %val, i1 addrspace(1)* %out
+ ret void
+}
+
+; OPT-LABEL: @constant_load_i1_align2
+; OPT: load i1
+; OPT-NEXT: store
+define amdgpu_kernel void @constant_load_i1_align2(i1 addrspace(1)* %out, i1 addrspace(2)* %in) #0 {
+ %val = load i1, i1 addrspace(2)* %in, align 2
+ store i1 %val, i1 addrspace(1)* %out, align 2
+ ret void
+}
+
+; OPT-LABEL: @constant_load_i1_align4
+; OPT: bitcast
+; OPT-NEXT: load i32
+; OPT-NEXT: trunc
+; OPT-NEXT: store
+define amdgpu_kernel void @constant_load_i1_align4(i1 addrspace(1)* %out, i1 addrspace(2)* %in) #0 {
+ %val = load i1, i1 addrspace(2)* %in, align 4
+ store i1 %val, i1 addrspace(1)* %out, align 4
+ ret void
+}
+
+; OPT-LABEL: @constant_load_i8
+; OPT: load i8
+; OPT-NEXT: store
+define amdgpu_kernel void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
+ %val = load i8, i8 addrspace(2)* %in
+ store i8 %val, i8 addrspace(1)* %out
+ ret void
+}
+
+; OPT-LABEL: @constant_load_i8_align2
+; OPT: load i8
+; OPT-NEXT: store
+define amdgpu_kernel void @constant_load_i8_align2(i8 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
+ %val = load i8, i8 addrspace(2)* %in, align 2
+ store i8 %val, i8 addrspace(1)* %out, align 2
+ ret void
+}
+
+; OPT-LABEL: @constant_load_i8align4
+; OPT: bitcast
+; OPT-NEXT: load i32
+; OPT-NEXT: trunc
+; OPT-NEXT: store
+define amdgpu_kernel void @constant_load_i8align4(i8 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
+ %val = load i8, i8 addrspace(2)* %in, align 4
+ store i8 %val, i8 addrspace(1)* %out, align 4
+ ret void
+}
+
+
+; OPT-LABEL: @constant_load_v2i8
+; OPT: load <2 x i8>
+; OPT-NEXT: store
+define amdgpu_kernel void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
+ %ld = load <2 x i8>, <2 x i8> addrspace(2)* %in
+ store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
+ ret void
+}
+
+; OPT-LABEL: @constant_load_v2i8_align4
+; OPT: bitcast
+; OPT-NEXT: load i32
+; OPT-NEXT: trunc
+; OPT-NEXT: bitcast
+; OPT-NEXT: store
+define amdgpu_kernel void @constant_load_v2i8_align4(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
+ %ld = load <2 x i8>, <2 x i8> addrspace(2)* %in, align 4
+ store <2 x i8> %ld, <2 x i8> addrspace(1)* %out, align 4
+ ret void
+}
+
+; OPT-LABEL: @constant_load_v3i8
+; OPT: bitcast <3 x i8>
+; OPT-NEXT: load i32, i32 addrspace(2)
+; OPT-NEXT: trunc i32
+; OPT-NEXT: bitcast i24
+; OPT-NEXT: store <3 x i8>
+define amdgpu_kernel void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
+ %ld = load <3 x i8>, <3 x i8> addrspace(2)* %in
+ store <3 x i8> %ld, <3 x i8> addrspace(1)* %out
+ ret void
+}
+
+; OPT-LABEL: @constant_load_v3i8_align4
+; OPT: bitcast <3 x i8>
+; OPT-NEXT: load i32, i32 addrspace(2)
+; OPT-NEXT: trunc i32
+; OPT-NEXT: bitcast i24
+; OPT-NEXT: store <3 x i8>
+define amdgpu_kernel void @constant_load_v3i8_align4(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
+ %ld = load <3 x i8>, <3 x i8> addrspace(2)* %in, align 4
+ store <3 x i8> %ld, <3 x i8> addrspace(1)* %out, align 4
+ ret void
+}
+
+; OPT-LABEL: @constant_load_i16
+; OPT: load i16
+; OPT: sext
+; OPT-NEXT: store
+define amdgpu_kernel void @constant_load_i16(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
+ %ld = load i16, i16 addrspace(2)* %in
+ %ext = sext i16 %ld to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; OPT-LABEL: @constant_load_i16_align4
+; OPT: bitcast
+; OPT-NEXT: load i32
+; OPT-NEXT: trunc
+; OPT-NEXT: sext
+; OPT-NEXT: store
+define amdgpu_kernel void @constant_load_i16_align4(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
+ %ld = load i16, i16 addrspace(2)* %in, align 4
+ %ext = sext i16 %ld to i32
+ store i32 %ext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; OPT-LABEL: @constant_load_f16
+; OPT: load half
+; OPT-NEXT: store
+define amdgpu_kernel void @constant_load_f16(half addrspace(1)* %out, half addrspace(2)* %in) #0 {
+ %ld = load half, half addrspace(2)* %in
+ store half %ld, half addrspace(1)* %out
+ ret void
+}
+
+; OPT-LABEL: @constant_load_v2f16
+; OPT: load <2 x half>
+; OPT-NEXT: store
+define amdgpu_kernel void @constant_load_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(2)* %in) #0 {
+ %ld = load <2 x half>, <2 x half> addrspace(2)* %in
+ store <2 x half> %ld, <2 x half> addrspace(1)* %out
+ ret void
+}
+
+; OPT-LABEL: @load_volatile
+; OPT: load volatile i16
+; OPT-NEXT: store
+define amdgpu_kernel void @load_volatile(i16 addrspace(1)* %out, i16 addrspace(2)* %in) {
+ %a = load volatile i16, i16 addrspace(2)* %in
+ store i16 %a, i16 addrspace(1)* %out
+ ret void
+}
+
+; OPT-LABEL: @constant_load_v2i8_volatile
+; OPT: load volatile <2 x i8>
+; OPT-NEXT: store
+define amdgpu_kernel void @constant_load_v2i8_volatile(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
+ %ld = load volatile <2 x i8>, <2 x i8> addrspace(2)* %in
+ store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
+ ret void
+}
+
+; OPT-LABEL: @constant_load_v2i8_addrspace1
+; OPT: load <2 x i8>
+; OPT-NEXT: store
+define amdgpu_kernel void @constant_load_v2i8_addrspace1(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
+ %ld = load <2 x i8>, <2 x i8> addrspace(1)* %in
+ store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
+ ret void
+}
+
+; OPT-LABEL: @use_dispatch_ptr
+; OPT: bitcast
+; OPT-NEXT: load i32
+; OPT-NEXT: trunc
+; OPT-NEXT: zext
+; OPT-NEXT: store
+define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
+ %dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
+ %val = load i8, i8 addrspace(2)* %dispatch.ptr, align 4
+ %ld = zext i8 %val to i32
+ store i32 %ld, i32 addrspace(1)* %ptr
+ ret void
+}
+
+attributes #0 = { nounwind }
OpenPOWER on IntegriCloud