summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-06-02 19:54:26 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-06-02 19:54:26 +0000
commitd1097a38e2b754a98bd60a9581316f0ea9eae6bc (patch)
tree2e4ea47f0d21051928c068e7765aa5817605e5ce /llvm/test
parentf4e9c9ac08315bed9e5f8ffd0c1612540844370b (diff)
downloadbcm5719-llvm-d1097a38e2b754a98bd60a9581316f0ea9eae6bc.tar.gz
bcm5719-llvm-d1097a38e2b754a98bd60a9581316f0ea9eae6bc.zip
AMDGPU: Cleanup load tests
There are a lot of different kinds of loads to test for, and these were scattered around inconsistently with some redundancy. Try to comprehensively test all loads in a consistent way. llvm-svn: 271571
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/AMDGPU/extload.ll40
-rw-r--r--llvm/test/CodeGen/AMDGPU/fpext.ll12
-rw-r--r--llvm/test/CodeGen/AMDGPU/global-extload-i1.ll302
-rw-r--r--llvm/test/CodeGen/AMDGPU/global-extload-i16.ll302
-rw-r--r--llvm/test/CodeGen/AMDGPU/global-extload-i32.ll308
-rw-r--r--llvm/test/CodeGen/AMDGPU/global-extload-i8.ll299
-rw-r--r--llvm/test/CodeGen/AMDGPU/kernel-args.ll54
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-constant-f64.ll15
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-constant-i1.ll371
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-constant-i16.ll761
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-constant-i32.ll378
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-constant-i64.ll90
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-constant-i8.ll605
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-global-f32.ll93
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-global-f64.ll94
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-global-i1.ll371
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-global-i16.ll774
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-global-i32.ll523
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-global-i64.ll125
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-global-i8.ll579
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-i1.ll149
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-local-f32.ll109
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-local-f64.ll154
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-local-i1.ll371
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-local-i16.ll608
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-local-i32.ll182
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-local-i64.ll154
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-local-i8.ll562
-rw-r--r--llvm/test/CodeGen/AMDGPU/load.ll750
-rw-r--r--llvm/test/CodeGen/AMDGPU/load.vec.ll25
-rw-r--r--llvm/test/CodeGen/AMDGPU/load64.ll31
-rw-r--r--llvm/test/CodeGen/AMDGPU/local-memory.ll35
-rw-r--r--llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll641
33 files changed, 7426 insertions, 2441 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/extload.ll b/llvm/test/CodeGen/AMDGPU/extload.ll
index b8d7d56722a..2cb5cf0422d 100644
--- a/llvm/test/CodeGen/AMDGPU/extload.ll
+++ b/llvm/test/CodeGen/AMDGPU/extload.ll
@@ -1,14 +1,16 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=CI-HSA -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; FUNC-LABEL: {{^}}anyext_load_i8:
-; SI: buffer_load_dword v{{[0-9]+}}
-; SI: buffer_store_dword v{{[0-9]+}}
+; FIXME: This seems to not ever actually become an extload
+; FUNC-LABEL: {{^}}global_anyext_load_i8:
+; GCN: buffer_load_dword v{{[0-9]+}}
+; GCN: buffer_store_dword v{{[0-9]+}}
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+.[XYZW]]],
; EG: VTX_READ_32 [[VAL]]
-define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind {
+define void @global_anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind {
%cast = bitcast i8 addrspace(1)* %src to i32 addrspace(1)*
%load = load i32, i32 addrspace(1)* %cast
%x = bitcast i32 %load to <4 x i8>
@@ -17,13 +19,13 @@ define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspac
ret void
}
-; FUNC-LABEL: {{^}}anyext_load_i16:
-; SI: buffer_load_dword v{{[0-9]+}}
-; SI: buffer_store_dword v{{[0-9]+}}
+; FUNC-LABEL: {{^}}global_anyext_load_i16:
+; GCN: buffer_load_dword v{{[0-9]+}}
+; GCN: buffer_store_dword v{{[0-9]+}}
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+.[XYZW]]],
; EG: VTX_READ_32 [[VAL]]
-define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrspace(1)* nocapture noalias %src) nounwind {
+define void @global_anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrspace(1)* nocapture noalias %src) nounwind {
%cast = bitcast i16 addrspace(1)* %src to i32 addrspace(1)*
%load = load i32, i32 addrspace(1)* %cast
%x = bitcast i32 %load to <2 x i16>
@@ -32,13 +34,13 @@ define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrs
ret void
}
-; FUNC-LABEL: {{^}}anyext_load_lds_i8:
-; SI: ds_read_b32 v{{[0-9]+}}
-; SI: ds_write_b32 v{{[0-9]+}}
+; FUNC-LABEL: {{^}}local_anyext_load_i8:
+; GCN: ds_read_b32 v{{[0-9]+}}
+; GCN: ds_write_b32 v{{[0-9]+}}
; EG: LDS_READ_RET {{.*}}, [[VAL:T[0-9]+.[XYZW]]]
; EG: LDS_WRITE * [[VAL]]
-define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind {
+define void @local_anyext_load_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind {
%cast = bitcast i8 addrspace(3)* %src to i32 addrspace(3)*
%load = load i32, i32 addrspace(3)* %cast
%x = bitcast i32 %load to <4 x i8>
@@ -47,13 +49,13 @@ define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addr
ret void
}
-; FUNC-LABEL: {{^}}anyext_load_lds_i16:
-; SI: ds_read_b32 v{{[0-9]+}}
-; SI: ds_write_b32 v{{[0-9]+}}
+; FUNC-LABEL: {{^}}local_anyext_load_i16:
+; GCN: ds_read_b32 v{{[0-9]+}}
+; GCN: ds_write_b32 v{{[0-9]+}}
; EG: LDS_READ_RET {{.*}}, [[VAL:T[0-9]+.[XYZW]]]
; EG: LDS_WRITE * [[VAL]]
-define void @anyext_load_lds_i16(i16 addrspace(3)* nocapture noalias %out, i16 addrspace(3)* nocapture noalias %src) nounwind {
+define void @local_anyext_load_i16(i16 addrspace(3)* nocapture noalias %out, i16 addrspace(3)* nocapture noalias %src) nounwind {
%cast = bitcast i16 addrspace(3)* %src to i32 addrspace(3)*
%load = load i32, i32 addrspace(3)* %cast
%x = bitcast i32 %load to <2 x i16>
diff --git a/llvm/test/CodeGen/AMDGPU/fpext.ll b/llvm/test/CodeGen/AMDGPU/fpext.ll
index 734a43be229..ad06bdd90a9 100644
--- a/llvm/test/CodeGen/AMDGPU/fpext.ll
+++ b/llvm/test/CodeGen/AMDGPU/fpext.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}fpext_f32_to_f64:
@@ -18,6 +18,16 @@ define void @fpext_v2f32_to_v2f64(<2 x double> addrspace(1)* %out, <2 x float> %
ret void
}
+; FUNC-LABEL: {{^}}fpext_v3f32_to_v3f64:
+; SI: v_cvt_f64_f32_e32
+; SI: v_cvt_f64_f32_e32
+; SI: v_cvt_f64_f32_e32
+define void @fpext_v3f32_to_v3f64(<3 x double> addrspace(1)* %out, <3 x float> %in) {
+ %result = fpext <3 x float> %in to <3 x double>
+ store <3 x double> %result, <3 x double> addrspace(1)* %out
+ ret void
+}
+
; FUNC-LABEL: {{^}}fpext_v4f32_to_v4f64:
; SI: v_cvt_f64_f32_e32
; SI: v_cvt_f64_f32_e32
diff --git a/llvm/test/CodeGen/AMDGPU/global-extload-i1.ll b/llvm/test/CodeGen/AMDGPU/global-extload-i1.ll
deleted file mode 100644
index c9d2c692e61..00000000000
--- a/llvm/test/CodeGen/AMDGPU/global-extload-i1.ll
+++ /dev/null
@@ -1,302 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; FIXME: Evergreen broken
-
-; FUNC-LABEL: {{^}}zextload_global_i1_to_i32:
-; SI: buffer_load_ubyte
-; SI: buffer_store_dword
-; SI: s_endpgm
-define void @zextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %a = load i1, i1 addrspace(1)* %in
- %ext = zext i1 %a to i32
- store i32 %ext, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_i1_to_i32:
-; SI: buffer_load_ubyte
-; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}}
-; SI: buffer_store_dword
-; SI: s_endpgm
-define void @sextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %a = load i1, i1 addrspace(1)* %in
- %ext = sext i1 %a to i32
- store i32 %ext, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i32:
-; SI: s_endpgm
-define void @zextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
- %ext = zext <1 x i1> %load to <1 x i32>
- store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i32:
-; SI: s_endpgm
-define void @sextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
- %ext = sext <1 x i1> %load to <1 x i32>
- store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i32:
-; SI: s_endpgm
-define void @zextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
- %ext = zext <2 x i1> %load to <2 x i32>
- store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i32:
-; SI: s_endpgm
-define void @sextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
- %ext = sext <2 x i1> %load to <2 x i32>
- store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i32:
-; SI: s_endpgm
-define void @zextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
- %ext = zext <4 x i1> %load to <4 x i32>
- store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i32:
-; SI: s_endpgm
-define void @sextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
- %ext = sext <4 x i1> %load to <4 x i32>
- store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i32:
-; SI: s_endpgm
-define void @zextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
- %ext = zext <8 x i1> %load to <8 x i32>
- store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i32:
-; SI: s_endpgm
-define void @sextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
- %ext = sext <8 x i1> %load to <8 x i32>
- store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i32:
-; SI: s_endpgm
-define void @zextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
- %ext = zext <16 x i1> %load to <16 x i32>
- store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i32:
-; SI: s_endpgm
-define void @sextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
- %ext = sext <16 x i1> %load to <16 x i32>
- store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
- ret void
-}
-
-; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i32:
-; XSI: s_endpgm
-; define void @zextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
-; %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
-; %ext = zext <32 x i1> %load to <32 x i32>
-; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
-; ret void
-; }
-
-; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i32:
-; XSI: s_endpgm
-; define void @sextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
-; %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
-; %ext = sext <32 x i1> %load to <32 x i32>
-; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
-; ret void
-; }
-
-; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i32:
-; XSI: s_endpgm
-; define void @zextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
-; %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
-; %ext = zext <64 x i1> %load to <64 x i32>
-; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
-; ret void
-; }
-
-; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i32:
-; XSI: s_endpgm
-; define void @sextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
-; %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
-; %ext = sext <64 x i1> %load to <64 x i32>
-; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
-; ret void
-; }
-
-; FUNC-LABEL: {{^}}zextload_global_i1_to_i64:
-; SI-DAG: buffer_load_ubyte [[LOAD:v[0-9]+]],
-; SI-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
-; SI: buffer_store_dwordx2
-define void @zextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %a = load i1, i1 addrspace(1)* %in
- %ext = zext i1 %a to i64
- store i64 %ext, i64 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_i1_to_i64:
-; SI: buffer_load_ubyte [[LOAD:v[0-9]+]],
-; SI: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
-; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
-; SI: buffer_store_dwordx2
-define void @sextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %a = load i1, i1 addrspace(1)* %in
- %ext = sext i1 %a to i64
- store i64 %ext, i64 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i64:
-; SI: s_endpgm
-define void @zextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
- %ext = zext <1 x i1> %load to <1 x i64>
- store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i64:
-; SI: s_endpgm
-define void @sextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
- %ext = sext <1 x i1> %load to <1 x i64>
- store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i64:
-; SI: s_endpgm
-define void @zextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
- %ext = zext <2 x i1> %load to <2 x i64>
- store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i64:
-; SI: s_endpgm
-define void @sextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
- %ext = sext <2 x i1> %load to <2 x i64>
- store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i64:
-; SI: s_endpgm
-define void @zextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
- %ext = zext <4 x i1> %load to <4 x i64>
- store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i64:
-; SI: s_endpgm
-define void @sextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
- %ext = sext <4 x i1> %load to <4 x i64>
- store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i64:
-; SI: s_endpgm
-define void @zextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
- %ext = zext <8 x i1> %load to <8 x i64>
- store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i64:
-; SI: s_endpgm
-define void @sextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
- %ext = sext <8 x i1> %load to <8 x i64>
- store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i64:
-; SI: s_endpgm
-define void @zextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
- %ext = zext <16 x i1> %load to <16 x i64>
- store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i64:
-; SI: s_endpgm
-define void @sextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
- %ext = sext <16 x i1> %load to <16 x i64>
- store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
- ret void
-}
-
-; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i64:
-; XSI: s_endpgm
-; define void @zextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
-; %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
-; %ext = zext <32 x i1> %load to <32 x i64>
-; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
-; ret void
-; }
-
-; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i64:
-; XSI: s_endpgm
-; define void @sextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
-; %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
-; %ext = sext <32 x i1> %load to <32 x i64>
-; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
-; ret void
-; }
-
-; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i64:
-; XSI: s_endpgm
-; define void @zextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
-; %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
-; %ext = zext <64 x i1> %load to <64 x i64>
-; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
-; ret void
-; }
-
-; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i64:
-; XSI: s_endpgm
-; define void @sextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
-; %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
-; %ext = sext <64 x i1> %load to <64 x i64>
-; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
-; ret void
-; }
diff --git a/llvm/test/CodeGen/AMDGPU/global-extload-i16.ll b/llvm/test/CodeGen/AMDGPU/global-extload-i16.ll
deleted file mode 100644
index 1fd7f6b0335..00000000000
--- a/llvm/test/CodeGen/AMDGPU/global-extload-i16.ll
+++ /dev/null
@@ -1,302 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; FIXME: cypress is broken because the bigger testcases spill and it's not implemented
-
-; FUNC-LABEL: {{^}}zextload_global_i16_to_i32:
-; SI: buffer_load_ushort
-; SI: buffer_store_dword
-; SI: s_endpgm
-define void @zextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
- %a = load i16, i16 addrspace(1)* %in
- %ext = zext i16 %a to i32
- store i32 %ext, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_i16_to_i32:
-; SI: buffer_load_sshort
-; SI: buffer_store_dword
-; SI: s_endpgm
-define void @sextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
- %a = load i16, i16 addrspace(1)* %in
- %ext = sext i16 %a to i32
- store i32 %ext, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i32:
-; SI: buffer_load_ushort
-; SI: s_endpgm
-define void @zextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
- %ext = zext <1 x i16> %load to <1 x i32>
- store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i32:
-; SI: buffer_load_sshort
-; SI: s_endpgm
-define void @sextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
- %ext = sext <1 x i16> %load to <1 x i32>
- store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i32:
-; SI: s_endpgm
-define void @zextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
- %ext = zext <2 x i16> %load to <2 x i32>
- store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i32:
-; SI: s_endpgm
-define void @sextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
- %ext = sext <2 x i16> %load to <2 x i32>
- store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i32:
-; SI: s_endpgm
-define void @zextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
- %ext = zext <4 x i16> %load to <4 x i32>
- store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i32:
-; SI: s_endpgm
-define void @sextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
- %ext = sext <4 x i16> %load to <4 x i32>
- store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i32:
-; SI: s_endpgm
-define void @zextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
- %ext = zext <8 x i16> %load to <8 x i32>
- store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i32:
-; SI: s_endpgm
-define void @sextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
- %ext = sext <8 x i16> %load to <8 x i32>
- store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i32:
-; SI: s_endpgm
-define void @zextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
- %ext = zext <16 x i16> %load to <16 x i32>
- store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i32:
-; SI: s_endpgm
-define void @sextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
- %ext = sext <16 x i16> %load to <16 x i32>
- store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i32:
-; SI: s_endpgm
-define void @zextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
- %ext = zext <32 x i16> %load to <32 x i32>
- store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i32:
-; SI: s_endpgm
-define void @sextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
- %ext = sext <32 x i16> %load to <32 x i32>
- store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i32:
-; SI: s_endpgm
-define void @zextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
- %ext = zext <64 x i16> %load to <64 x i32>
- store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i32:
-; SI: s_endpgm
-define void @sextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
- %ext = sext <64 x i16> %load to <64 x i32>
- store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_i16_to_i64:
-; SI-DAG: buffer_load_ushort v[[LO:[0-9]+]],
-; SI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
-; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
-define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
- %a = load i16, i16 addrspace(1)* %in
- %ext = zext i16 %a to i64
- store i64 %ext, i64 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_i16_to_i64:
-; SI: buffer_load_sshort [[LOAD:v[0-9]+]],
-; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
-; SI: buffer_store_dwordx2
-define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
- %a = load i16, i16 addrspace(1)* %in
- %ext = sext i16 %a to i64
- store i64 %ext, i64 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i64:
-; SI: s_endpgm
-define void @zextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
- %ext = zext <1 x i16> %load to <1 x i64>
- store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i64:
-; SI: s_endpgm
-define void @sextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
- %ext = sext <1 x i16> %load to <1 x i64>
- store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i64:
-; SI: s_endpgm
-define void @zextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
- %ext = zext <2 x i16> %load to <2 x i64>
- store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i64:
-; SI: s_endpgm
-define void @sextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
- %ext = sext <2 x i16> %load to <2 x i64>
- store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i64:
-; SI: s_endpgm
-define void @zextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
- %ext = zext <4 x i16> %load to <4 x i64>
- store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i64:
-; SI: s_endpgm
-define void @sextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
- %ext = sext <4 x i16> %load to <4 x i64>
- store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i64:
-; SI: s_endpgm
-define void @zextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
- %ext = zext <8 x i16> %load to <8 x i64>
- store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i64:
-; SI: s_endpgm
-define void @sextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
- %ext = sext <8 x i16> %load to <8 x i64>
- store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i64:
-; SI: s_endpgm
-define void @zextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
- %ext = zext <16 x i16> %load to <16 x i64>
- store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i64:
-; SI: s_endpgm
-define void @sextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
- %ext = sext <16 x i16> %load to <16 x i64>
- store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i64:
-; SI: s_endpgm
-define void @zextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
- %ext = zext <32 x i16> %load to <32 x i64>
- store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i64:
-; SI: s_endpgm
-define void @sextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
- %ext = sext <32 x i16> %load to <32 x i64>
- store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i64:
-; SI: s_endpgm
-define void @zextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
- %ext = zext <64 x i16> %load to <64 x i64>
- store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i64:
-; SI: s_endpgm
-define void @sextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
- %ext = sext <64 x i16> %load to <64 x i64>
- store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
- ret void
-}
diff --git a/llvm/test/CodeGen/AMDGPU/global-extload-i32.ll b/llvm/test/CodeGen/AMDGPU/global-extload-i32.ll
deleted file mode 100644
index 01dfe6f72b9..00000000000
--- a/llvm/test/CodeGen/AMDGPU/global-extload-i32.ll
+++ /dev/null
@@ -1,308 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}zextload_global_i32_to_i64:
-; SI-DAG: buffer_load_dword v[[LO:[0-9]+]],
-; SI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
-; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
-define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %a = load i32, i32 addrspace(1)* %in
- %ext = zext i32 %a to i64
- store i64 %ext, i64 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_i32_to_i64:
-; SI: buffer_load_dword [[LOAD:v[0-9]+]],
-; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
-; SI: buffer_store_dwordx2
-define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %a = load i32, i32 addrspace(1)* %in
- %ext = sext i32 %a to i64
- store i64 %ext, i64 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v1i32_to_v1i64:
-; SI: buffer_load_dword
-; SI: buffer_store_dwordx2
-; SI: s_endpgm
-define void @zextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i32>, <1 x i32> addrspace(1)* %in
- %ext = zext <1 x i32> %load to <1 x i64>
- store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v1i32_to_v1i64:
-; SI: buffer_load_dword
-; SI: v_ashrrev_i32
-; SI: buffer_store_dwordx2
-; SI: s_endpgm
-define void @sextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i32>, <1 x i32> addrspace(1)* %in
- %ext = sext <1 x i32> %load to <1 x i64>
- store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v2i32_to_v2i64:
-; SI: buffer_load_dwordx2
-; SI: buffer_store_dwordx4
-; SI: s_endpgm
-define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i32>, <2 x i32> addrspace(1)* %in
- %ext = zext <2 x i32> %load to <2 x i64>
- store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v2i32_to_v2i64:
-; SI: buffer_load_dwordx2
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx4
-; SI: s_endpgm
-define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i32>, <2 x i32> addrspace(1)* %in
- %ext = sext <2 x i32> %load to <2 x i64>
- store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v4i32_to_v4i64:
-; SI: buffer_load_dwordx4
-; SI: buffer_store_dwordx4
-; SI: buffer_store_dwordx4
-; SI: s_endpgm
-define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i32>, <4 x i32> addrspace(1)* %in
- %ext = zext <4 x i32> %load to <4 x i64>
- store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v4i32_to_v4i64:
-; SI: buffer_load_dwordx4
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI: s_endpgm
-define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i32>, <4 x i32> addrspace(1)* %in
- %ext = sext <4 x i32> %load to <4 x i64>
- store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v8i32_to_v8i64:
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI: s_endpgm
-define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i32>, <8 x i32> addrspace(1)* %in
- %ext = zext <8 x i32> %load to <8 x i64>
- store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v8i32_to_v8i64:
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI: s_endpgm
-define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i32>, <8 x i32> addrspace(1)* %in
- %ext = sext <8 x i32> %load to <8 x i64>
- store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v16i32_to_v16i64:
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx4
-
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx4
-
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx4
-
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx4
-; SI: s_endpgm
-define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i32>, <16 x i32> addrspace(1)* %in
- %ext = sext <16 x i32> %load to <16 x i64>
- store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v16i32_to_v16i64
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-
-; SI: buffer_store_dwordx4
-; SI: buffer_store_dwordx4
-; SI: buffer_store_dwordx4
-; SI: buffer_store_dwordx4
-; SI: buffer_store_dwordx4
-; SI: buffer_store_dwordx4
-; SI: buffer_store_dwordx4
-; SI: buffer_store_dwordx4
-; SI: s_endpgm
-define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i32>, <16 x i32> addrspace(1)* %in
- %ext = zext <16 x i32> %load to <16 x i64>
- store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v32i32_to_v32i64:
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-
-
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-; SI-DAG: v_ashrrev_i32
-
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-
-; SI: s_endpgm
-define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <32 x i32>, <32 x i32> addrspace(1)* %in
- %ext = sext <32 x i32> %load to <32 x i64>
- store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v32i32_to_v32i64:
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx4
-
-; SI: s_endpgm
-define void @zextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <32 x i32>, <32 x i32> addrspace(1)* %in
- %ext = zext <32 x i32> %load to <32 x i64>
- store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
- ret void
-}
diff --git a/llvm/test/CodeGen/AMDGPU/global-extload-i8.ll b/llvm/test/CodeGen/AMDGPU/global-extload-i8.ll
deleted file mode 100644
index 40bc1ddfd1a..00000000000
--- a/llvm/test/CodeGen/AMDGPU/global-extload-i8.ll
+++ /dev/null
@@ -1,299 +0,0 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}zextload_global_i8_to_i32:
-; SI: buffer_load_ubyte
-; SI: buffer_store_dword
-; SI: s_endpgm
-define void @zextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
- %a = load i8, i8 addrspace(1)* %in
- %ext = zext i8 %a to i32
- store i32 %ext, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_i8_to_i32:
-; SI: buffer_load_sbyte
-; SI: buffer_store_dword
-; SI: s_endpgm
-define void @sextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
- %a = load i8, i8 addrspace(1)* %in
- %ext = sext i8 %a to i32
- store i32 %ext, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i32:
-; SI: s_endpgm
-define void @zextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
- %ext = zext <1 x i8> %load to <1 x i32>
- store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i32:
-; SI: s_endpgm
-define void @sextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
- %ext = sext <1 x i8> %load to <1 x i32>
- store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i32:
-; SI: s_endpgm
-define void @zextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
- %ext = zext <2 x i8> %load to <2 x i32>
- store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i32:
-; SI: s_endpgm
-define void @sextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
- %ext = sext <2 x i8> %load to <2 x i32>
- store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i32:
-; SI: s_endpgm
-define void @zextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
- %ext = zext <4 x i8> %load to <4 x i32>
- store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i32:
-; SI: s_endpgm
-define void @sextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
- %ext = sext <4 x i8> %load to <4 x i32>
- store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i32:
-; SI: s_endpgm
-define void @zextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
- %ext = zext <8 x i8> %load to <8 x i32>
- store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i32:
-; SI: s_endpgm
-define void @sextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
- %ext = sext <8 x i8> %load to <8 x i32>
- store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i32:
-; SI: s_endpgm
-define void @zextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
- %ext = zext <16 x i8> %load to <16 x i32>
- store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i32:
-; SI: s_endpgm
-define void @sextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
- %ext = sext <16 x i8> %load to <16 x i32>
- store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
- ret void
-}
-
-; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i32:
-; XSI: s_endpgm
-; define void @zextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
-; %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
-; %ext = zext <32 x i8> %load to <32 x i32>
-; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
-; ret void
-; }
-
-; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i32:
-; XSI: s_endpgm
-; define void @sextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
-; %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
-; %ext = sext <32 x i8> %load to <32 x i32>
-; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
-; ret void
-; }
-
-; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i32:
-; XSI: s_endpgm
-; define void @zextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
-; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
-; %ext = zext <64 x i8> %load to <64 x i32>
-; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
-; ret void
-; }
-
-; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i32:
-; XSI: s_endpgm
-; define void @sextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
-; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
-; %ext = sext <64 x i8> %load to <64 x i32>
-; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
-; ret void
-; }
-
-; FUNC-LABEL: {{^}}zextload_global_i8_to_i64:
-; SI-DAG: buffer_load_ubyte v[[LO:[0-9]+]],
-; SI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
-; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
-define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
- %a = load i8, i8 addrspace(1)* %in
- %ext = zext i8 %a to i64
- store i64 %ext, i64 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_i8_to_i64:
-; SI: buffer_load_sbyte [[LOAD:v[0-9]+]],
-; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
-; SI: buffer_store_dwordx2
-define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
- %a = load i8, i8 addrspace(1)* %in
- %ext = sext i8 %a to i64
- store i64 %ext, i64 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i64:
-; SI: s_endpgm
-define void @zextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
- %ext = zext <1 x i8> %load to <1 x i64>
- store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i64:
-; SI: s_endpgm
-define void @sextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
- %ext = sext <1 x i8> %load to <1 x i64>
- store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i64:
-; SI: s_endpgm
-define void @zextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
- %ext = zext <2 x i8> %load to <2 x i64>
- store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i64:
-; SI: s_endpgm
-define void @sextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
- %ext = sext <2 x i8> %load to <2 x i64>
- store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i64:
-; SI: s_endpgm
-define void @zextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
- %ext = zext <4 x i8> %load to <4 x i64>
- store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i64:
-; SI: s_endpgm
-define void @sextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
- %ext = sext <4 x i8> %load to <4 x i64>
- store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i64:
-; SI: s_endpgm
-define void @zextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
- %ext = zext <8 x i8> %load to <8 x i64>
- store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i64:
-; SI: s_endpgm
-define void @sextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
- %ext = sext <8 x i8> %load to <8 x i64>
- store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i64:
-; SI: s_endpgm
-define void @zextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
- %ext = zext <16 x i8> %load to <16 x i64>
- store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i64:
-; SI: s_endpgm
-define void @sextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
- %ext = sext <16 x i8> %load to <16 x i64>
- store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i64:
-; SI: s_endpgm
-define void @zextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
- %ext = zext <32 x i8> %load to <32 x i64>
- store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i64:
-; SI: s_endpgm
-define void @sextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
- %ext = sext <32 x i8> %load to <32 x i64>
- store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
- ret void
-}
-
-; ; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i64:
-; ; XSI: s_endpgm
-; define void @zextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
-; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
-; %ext = zext <64 x i8> %load to <64 x i64>
-; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
-; ret void
-; }
-
-; ; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i64:
-; ; XSI: s_endpgm
-; define void @sextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
-; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
-; %ext = sext <64 x i8> %load to <64 x i64>
-; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
-; ret void
-; }
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-args.ll b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
index e9d98ac89e7..7567b38e0ce 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC
+; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
@@ -475,3 +475,55 @@ entry:
; store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
; ret void
; }
+
+; FUNC-LABEL: {{^}}i1_arg:
+; SI: buffer_load_ubyte
+; SI: v_and_b32_e32
+; SI: buffer_store_byte
+; SI: s_endpgm
+define void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
+ store i1 %x, i1 addrspace(1)* %out, align 1
+ ret void
+}
+
+; FUNC-LABEL: {{^}}i1_arg_zext_i32:
+; SI: buffer_load_ubyte
+; SI: buffer_store_dword
+; SI: s_endpgm
+define void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
+ %ext = zext i1 %x to i32
+ store i32 %ext, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}i1_arg_zext_i64:
+; SI: buffer_load_ubyte
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
+ %ext = zext i1 %x to i64
+ store i64 %ext, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}i1_arg_sext_i32:
+; SI: buffer_load_ubyte
+; SI: buffer_store_dword
+; SI: s_endpgm
+define void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
+ %ext = sext i1 %x to i32
+ store i32 %ext, i32addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}i1_arg_sext_i64:
+; SI: buffer_load_ubyte
+; SI: v_bfe_i32
+; SI: v_ashrrev_i32
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
+ %ext = sext i1 %x to i64
+ store i64 %ext, i64 addrspace(1)* %out, align 8
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-f64.ll b/llvm/test/CodeGen/AMDGPU/load-constant-f64.ll
new file mode 100644
index 00000000000..f94a3785a68
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-f64.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}constant_load_f64:
+; GCN: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}]
+; GCN-NOHSA: buffer_store_dwordx2
+; GCN-HSA: flat_store_dwordx2
+define void @constant_load_f64(double addrspace(1)* %out, double addrspace(2)* %in) #0 {
+ %ld = load double, double addrspace(2)* %in
+ store double %ld, double addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
new file mode 100644
index 00000000000..f15e4f484ff
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
@@ -0,0 +1,371 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}constant_load_i1:
+; GCN: buffer_load_ubyte
+; GCN: v_and_b32_e32 v{{[0-9]+}}, 1
+; GCN: buffer_store_byte
+
+; EG: VTX_READ_8
+; EG: AND_INT
+define void @constant_load_i1(i1 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
+ %load = load i1, i1 addrspace(2)* %in
+ store i1 %load, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v2i1:
+define void @constant_load_v2i1(<2 x i1> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <2 x i1>, <2 x i1> addrspace(2)* %in
+ store <2 x i1> %load, <2 x i1> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v3i1:
+define void @constant_load_v3i1(<3 x i1> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <3 x i1>, <3 x i1> addrspace(2)* %in
+ store <3 x i1> %load, <3 x i1> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v4i1:
+define void @constant_load_v4i1(<4 x i1> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <4 x i1>, <4 x i1> addrspace(2)* %in
+ store <4 x i1> %load, <4 x i1> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v8i1:
+define void @constant_load_v8i1(<8 x i1> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <8 x i1>, <8 x i1> addrspace(2)* %in
+ store <8 x i1> %load, <8 x i1> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v16i1:
+define void @constant_load_v16i1(<16 x i1> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <16 x i1>, <16 x i1> addrspace(2)* %in
+ store <16 x i1> %load, <16 x i1> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v32i1:
+define void @constant_load_v32i1(<32 x i1> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <32 x i1>, <32 x i1> addrspace(2)* %in
+ store <32 x i1> %load, <32 x i1> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v64i1:
+define void @constant_load_v64i1(<64 x i1> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <64 x i1>, <64 x i1> addrspace(2)* %in
+ store <64 x i1> %load, <64 x i1> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_i1_to_i32:
+; GCN: buffer_load_ubyte
+; GCN: buffer_store_dword
+define void @constant_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
+ %a = load i1, i1 addrspace(2)* %in
+ %ext = zext i1 %a to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_i1_to_i32:
+; GCN: buffer_load_ubyte
+; GCN: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}}
+; GCN: buffer_store_dword
+
+; EG: VTX_READ_8
+; EG: BFE_INT
+define void @constant_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
+ %a = load i1, i1 addrspace(2)* %in
+ %ext = sext i1 %a to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v1i1_to_v1i32:
+define void @constant_zextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <1 x i1>, <1 x i1> addrspace(2)* %in
+ %ext = zext <1 x i1> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v1i1_to_v1i32:
+define void @constant_sextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <1 x i1>, <1 x i1> addrspace(2)* %in
+ %ext = sext <1 x i1> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v2i1_to_v2i32:
+define void @constant_zextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <2 x i1>, <2 x i1> addrspace(2)* %in
+ %ext = zext <2 x i1> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v2i1_to_v2i32:
+define void @constant_sextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <2 x i1>, <2 x i1> addrspace(2)* %in
+ %ext = sext <2 x i1> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v3i1_to_v3i32:
+define void @constant_zextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <3 x i1>, <3 x i1> addrspace(2)* %in
+ %ext = zext <3 x i1> %load to <3 x i32>
+ store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v3i1_to_v3i32:
+define void @constant_sextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <3 x i1>, <3 x i1> addrspace(2)* %in
+ %ext = sext <3 x i1> %load to <3 x i32>
+ store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v4i1_to_v4i32:
+define void @constant_zextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <4 x i1>, <4 x i1> addrspace(2)* %in
+ %ext = zext <4 x i1> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v4i1_to_v4i32:
+define void @constant_sextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <4 x i1>, <4 x i1> addrspace(2)* %in
+ %ext = sext <4 x i1> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v8i1_to_v8i32:
+define void @constant_zextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <8 x i1>, <8 x i1> addrspace(2)* %in
+ %ext = zext <8 x i1> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v8i1_to_v8i32:
+define void @constant_sextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <8 x i1>, <8 x i1> addrspace(2)* %in
+ %ext = sext <8 x i1> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v16i1_to_v16i32:
+define void @constant_zextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <16 x i1>, <16 x i1> addrspace(2)* %in
+ %ext = zext <16 x i1> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v16i1_to_v16i32:
+define void @constant_sextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <16 x i1>, <16 x i1> addrspace(2)* %in
+ %ext = sext <16 x i1> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v32i1_to_v32i32:
+define void @constant_zextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <32 x i1>, <32 x i1> addrspace(2)* %in
+ %ext = zext <32 x i1> %load to <32 x i32>
+ store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v32i1_to_v32i32:
+define void @constant_sextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <32 x i1>, <32 x i1> addrspace(2)* %in
+ %ext = sext <32 x i1> %load to <32 x i32>
+ store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v64i1_to_v64i32:
+define void @constant_zextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <64 x i1>, <64 x i1> addrspace(2)* %in
+ %ext = zext <64 x i1> %load to <64 x i32>
+ store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v64i1_to_v64i32:
+define void @constant_sextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <64 x i1>, <64 x i1> addrspace(2)* %in
+ %ext = sext <64 x i1> %load to <64 x i32>
+ store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_i1_to_i64:
+; GCN-DAG: buffer_load_ubyte [[LOAD:v[0-9]+]],
+; GCN-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
+; GCN-DAG: v_and_b32_e32 {{v[0-9]+}}, 1, [[LOAD]]
+; GCN: buffer_store_dwordx2
+define void @constant_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
+ %a = load i1, i1 addrspace(2)* %in
+ %ext = zext i1 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_i1_to_i64:
+; GCN: buffer_load_ubyte [[LOAD:v[0-9]+]],
+; GCN: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
+; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
+; GCN: buffer_store_dwordx2
+define void @constant_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 {
+ %a = load i1, i1 addrspace(2)* %in
+ %ext = sext i1 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v1i1_to_v1i64:
+define void @constant_zextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <1 x i1>, <1 x i1> addrspace(2)* %in
+ %ext = zext <1 x i1> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v1i1_to_v1i64:
+define void @constant_sextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <1 x i1>, <1 x i1> addrspace(2)* %in
+ %ext = sext <1 x i1> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v2i1_to_v2i64:
+define void @constant_zextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <2 x i1>, <2 x i1> addrspace(2)* %in
+ %ext = zext <2 x i1> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v2i1_to_v2i64:
+define void @constant_sextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <2 x i1>, <2 x i1> addrspace(2)* %in
+ %ext = sext <2 x i1> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v3i1_to_v3i64:
+define void @constant_zextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <3 x i1>, <3 x i1> addrspace(2)* %in
+ %ext = zext <3 x i1> %load to <3 x i64>
+ store <3 x i64> %ext, <3 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v3i1_to_v3i64:
+define void @constant_sextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <3 x i1>, <3 x i1> addrspace(2)* %in
+ %ext = sext <3 x i1> %load to <3 x i64>
+ store <3 x i64> %ext, <3 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v4i1_to_v4i64:
+define void @constant_zextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <4 x i1>, <4 x i1> addrspace(2)* %in
+ %ext = zext <4 x i1> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v4i1_to_v4i64:
+define void @constant_sextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <4 x i1>, <4 x i1> addrspace(2)* %in
+ %ext = sext <4 x i1> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v8i1_to_v8i64:
+define void @constant_zextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <8 x i1>, <8 x i1> addrspace(2)* %in
+ %ext = zext <8 x i1> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v8i1_to_v8i64:
+define void @constant_sextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <8 x i1>, <8 x i1> addrspace(2)* %in
+ %ext = sext <8 x i1> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v16i1_to_v16i64:
+define void @constant_zextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <16 x i1>, <16 x i1> addrspace(2)* %in
+ %ext = zext <16 x i1> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v16i1_to_v16i64:
+define void @constant_sextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <16 x i1>, <16 x i1> addrspace(2)* %in
+ %ext = sext <16 x i1> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v32i1_to_v32i64:
+define void @constant_zextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <32 x i1>, <32 x i1> addrspace(2)* %in
+ %ext = zext <32 x i1> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v32i1_to_v32i64:
+define void @constant_sextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <32 x i1>, <32 x i1> addrspace(2)* %in
+ %ext = sext <32 x i1> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v64i1_to_v64i64:
+define void @constant_zextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <64 x i1>, <64 x i1> addrspace(2)* %in
+ %ext = zext <64 x i1> %load to <64 x i64>
+ store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v64i1_to_v64i64:
+define void @constant_sextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 {
+ %load = load <64 x i1>, <64 x i1> addrspace(2)* %in
+ %ext = sext <64 x i1> %load to <64 x i64>
+ store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
new file mode 100644
index 00000000000..0eeaef20b5c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
@@ -0,0 +1,761 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}constant_load_i16:
+; GCN-NOHSA: buffer_load_ushort v{{[0-9]+}}
+; GCN-HSA: flat_load_ushort
+
+; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
+define void @constant_load_i16(i16 addrspace(1)* %out, i16 addrspace(2)* %in) {
+entry:
+ %ld = load i16, i16 addrspace(2)* %in
+ store i16 %ld, i16 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v2i16:
+; GCN: s_load_dword s
+
+; EG: VTX_READ_32
+define void @constant_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) {
+entry:
+ %ld = load <2 x i16>, <2 x i16> addrspace(2)* %in
+ store <2 x i16> %ld, <2 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v3i16:
+; GCN: s_load_dwordx2 s
+
+; EG-DAG: VTX_READ_32
+; EG-DAG: VTX_READ_16
+define void @constant_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) {
+entry:
+ %ld = load <3 x i16>, <3 x i16> addrspace(2)* %in
+ store <3 x i16> %ld, <3 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v4i16:
+; GCN: s_load_dwordx2
+
+; EG: VTX_READ_64
+define void @constant_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) {
+entry:
+ %ld = load <4 x i16>, <4 x i16> addrspace(2)* %in
+ store <4 x i16> %ld, <4 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v8i16:
+; GCN: s_load_dwordx4
+
+; EG: VTX_READ_128
+define void @constant_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) {
+entry:
+ %ld = load <8 x i16>, <8 x i16> addrspace(2)* %in
+ store <8 x i16> %ld, <8 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v16i16:
+; GCN: s_load_dwordx8
+
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @constant_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) {
+entry:
+ %ld = load <16 x i16>, <16 x i16> addrspace(2)* %in
+ store <16 x i16> %ld, <16 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_i16_to_i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_store_dword
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_store_dword
+
+; EG: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
+define void @constant_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
+ %a = load i16, i16 addrspace(2)* %in
+ %ext = zext i16 %a to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_i16_to_i32:
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_store_dword
+
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_store_dword
+
+; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
+; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
+; EG: 16
+define void @constant_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
+ %a = load i16, i16 addrspace(2)* %in
+ %ext = sext i16 %a to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v1i16_to_v1i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-HSA: flat_load_ushort
+define void @constant_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
+ %load = load <1 x i16>, <1 x i16> addrspace(2)* %in
+ %ext = zext <1 x i16> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v1i16_to_v1i32:
+; GCN-NOHSA: buffer_load_sshort
+; GCN-HSA: flat_load_sshort
+define void @constant_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
+ %load = load <1 x i16>, <1 x i16> addrspace(2)* %in
+ %ext = sext <1 x i16> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v2i16_to_v2i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+define void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
+ %load = load <2 x i16>, <2 x i16> addrspace(2)* %in
+ %ext = zext <2 x i16> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v2i16_to_v2i32:
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+
+; EG-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
+; EG-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
+; EG-DAG: 16
+; EG-DAG: 16
+define void @constant_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
+ %load = load <2 x i16>, <2 x i16> addrspace(2)* %in
+ %ext = sext <2 x i16> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_constant_zextload_v3i16_to_v3i32:
+; GCN: s_load_dwordx2
+define void @constant_constant_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) {
+entry:
+ %ld = load <3 x i16>, <3 x i16> addrspace(2)* %in
+ %ext = zext <3 x i16> %ld to <3 x i32>
+ store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_constant_sextload_v3i16_to_v3i32:
+; GCN: s_load_dwordx2
+define void @constant_constant_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) {
+entry:
+ %ld = load <3 x i16>, <3 x i16> addrspace(2)* %in
+ %ext = sext <3 x i16> %ld to <3 x i32>
+ store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_constant_zextload_v4i16_to_v4i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+
+; EG: VTX_READ_16
+; EG: VTX_READ_16
+; EG: VTX_READ_16
+; EG: VTX_READ_16
+define void @constant_constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
+ %load = load <4 x i16>, <4 x i16> addrspace(2)* %in
+ %ext = zext <4 x i16> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v4i16_to_v4i32:
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+
+; EG-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
+; EG-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
+; EG-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
+; EG-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
+; EG-DAG: 16
+; EG-DAG: 16
+; EG-DAG: 16
+; EG-DAG: 16
+define void @constant_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
+ %load = load <4 x i16>, <4 x i16> addrspace(2)* %in
+ %ext = sext <4 x i16> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v8i16_to_v8i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+define void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
+ %load = load <8 x i16>, <8 x i16> addrspace(2)* %in
+ %ext = zext <8 x i16> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v8i16_to_v8i32:
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+define void @constant_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
+ %load = load <8 x i16>, <8 x i16> addrspace(2)* %in
+ %ext = sext <8 x i16> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v16i16_to_v16i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+define void @constant_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
+ %load = load <16 x i16>, <16 x i16> addrspace(2)* %in
+ %ext = zext <16 x i16> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v16i16_to_v16i32:
+define void @constant_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
+ %load = load <16 x i16>, <16 x i16> addrspace(2)* %in
+ %ext = sext <16 x i16> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v32i16_to_v32i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+define void @constant_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
+ %load = load <32 x i16>, <32 x i16> addrspace(2)* %in
+ %ext = zext <32 x i16> %load to <32 x i32>
+ store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v32i16_to_v32i32:
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+define void @constant_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
+ %load = load <32 x i16>, <32 x i16> addrspace(2)* %in
+ %ext = sext <32 x i16> %load to <32 x i32>
+ store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v64i16_to_v64i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+define void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
+ %load = load <64 x i16>, <64 x i16> addrspace(2)* %in
+ %ext = zext <64 x i16> %load to <64 x i32>
+ store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v64i16_to_v64i32:
+define void @constant_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
+ %load = load <64 x i16>, <64 x i16> addrspace(2)* %in
+ %ext = sext <64 x i16> %load to <64 x i32>
+ store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_i16_to_i64:
+; GCN-NOHSA-DAG: buffer_load_ushort v[[LO:[0-9]+]],
+; GCN-HSA-DAG: flat_load_ushort v[[LO:[0-9]+]],
+; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+
+; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @constant_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
+ %a = load i16, i16 addrspace(2)* %in
+ %ext = zext i16 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_i16_to_i64:
+; GCN-NOHSA-DAG: buffer_load_sshort v[[LO:[0-9]+]],
+; GCN-HSA-DAG: flat_load_sshort v[[LO:[0-9]+]],
+; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
+
+; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @constant_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
+ %a = load i16, i16 addrspace(2)* %in
+ %ext = sext i16 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v1i16_to_v1i64:
+define void @constant_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
+ %load = load <1 x i16>, <1 x i16> addrspace(2)* %in
+ %ext = zext <1 x i16> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v1i16_to_v1i64:
+define void @constant_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
+ %load = load <1 x i16>, <1 x i16> addrspace(2)* %in
+ %ext = sext <1 x i16> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v2i16_to_v2i64:
+define void @constant_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
+ %load = load <2 x i16>, <2 x i16> addrspace(2)* %in
+ %ext = zext <2 x i16> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v2i16_to_v2i64:
+define void @constant_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
+ %load = load <2 x i16>, <2 x i16> addrspace(2)* %in
+ %ext = sext <2 x i16> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v4i16_to_v4i64:
+define void @constant_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
+ %load = load <4 x i16>, <4 x i16> addrspace(2)* %in
+ %ext = zext <4 x i16> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v4i16_to_v4i64:
+define void @constant_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
+ %load = load <4 x i16>, <4 x i16> addrspace(2)* %in
+ %ext = sext <4 x i16> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v8i16_to_v8i64:
+define void @constant_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
+ %load = load <8 x i16>, <8 x i16> addrspace(2)* %in
+ %ext = zext <8 x i16> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v8i16_to_v8i64:
+define void @constant_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
+ %load = load <8 x i16>, <8 x i16> addrspace(2)* %in
+ %ext = sext <8 x i16> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v16i16_to_v16i64:
+define void @constant_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
+ %load = load <16 x i16>, <16 x i16> addrspace(2)* %in
+ %ext = zext <16 x i16> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v16i16_to_v16i64:
+define void @constant_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
+ %load = load <16 x i16>, <16 x i16> addrspace(2)* %in
+ %ext = sext <16 x i16> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v32i16_to_v32i64:
+define void @constant_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
+ %load = load <32 x i16>, <32 x i16> addrspace(2)* %in
+ %ext = zext <32 x i16> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v32i16_to_v32i64:
+define void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
+ %load = load <32 x i16>, <32 x i16> addrspace(2)* %in
+ %ext = sext <32 x i16> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+ ret void
+}
+
+; ; XFUNC-LABEL: {{^}}constant_zextload_v64i16_to_v64i64:
+; define void @constant_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
+; %load = load <64 x i16>, <64 x i16> addrspace(2)* %in
+; %ext = zext <64 x i16> %load to <64 x i64>
+; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+; ret void
+; }
+
+; ; XFUNC-LABEL: {{^}}constant_sextload_v64i16_to_v64i64:
+; define void @constant_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
+; %load = load <64 x i16>, <64 x i16> addrspace(2)* %in
+; %ext = sext <64 x i16> %load to <64 x i64>
+; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+; ret void
+; }
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i32.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i32.ll
new file mode 100644
index 00000000000..5f6ec652874
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i32.ll
@@ -0,0 +1,378 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}constant_load_i32:
+; GCN: s_load_dword s{{[0-9]+}}
+
+; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
+define void @constant_load_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) #0 {
+entry:
+ %ld = load i32, i32 addrspace(2)* %in
+ store i32 %ld, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v2i32:
+; GCN: s_load_dwordx2
+
+; EG: VTX_READ_64
+define void @constant_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 {
+entry:
+ %ld = load <2 x i32>, <2 x i32> addrspace(2)* %in
+ store <2 x i32> %ld, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v3i32:
+; GCN: s_load_dwordx4
+
+; EG: VTX_READ_128
+define void @constant_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(2)* %in) #0 {
+entry:
+ %ld = load <3 x i32>, <3 x i32> addrspace(2)* %in
+ store <3 x i32> %ld, <3 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v4i32:
+; GCN: s_load_dwordx4
+
+; EG: VTX_READ_128
+define void @constant_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 {
+entry:
+ %ld = load <4 x i32>, <4 x i32> addrspace(2)* %in
+ store <4 x i32> %ld, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v8i32:
+; GCN: s_load_dwordx8
+
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @constant_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 {
+entry:
+ %ld = load <8 x i32>, <8 x i32> addrspace(2)* %in
+ store <8 x i32> %ld, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v16i32:
+; GCN: s_load_dwordx16
+
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @constant_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 {
+entry:
+ %ld = load <16 x i32>, <16 x i32> addrspace(2)* %in
+ store <16 x i32> %ld, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_i32_to_i64:
+; GCN-DAG: s_load_dword s[[SLO:[0-9]+]],
+; GCN-DAG: v_mov_b32_e32 v[[SHI:[0-9]+]], 0{{$}}
+; GCN: store_dwordx2
+
+; EG: MEM_RAT
+; EG: MEM_RAT
+define void @constant_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(2)* %in) #0 {
+ %ld = load i32, i32 addrspace(2)* %in
+ %ext = zext i32 %ld to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_i32_to_i64:
+; GCN: s_load_dword s[[SLO:[0-9]+]]
+; GCN: s_ashr_i32 s[[HI:[0-9]+]], s[[SLO]], 31
+; GCN: store_dwordx2
+
+; EG: MEM_RAT
+; EG: MEM_RAT
+; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x
+; EG: 31
+define void @constant_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(2)* %in) #0 {
+ %ld = load i32, i32 addrspace(2)* %in
+ %ext = sext i32 %ld to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v1i32_to_v1i64:
+; GCN: s_load_dword
+; GCN: store_dwordx2
+define void @constant_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(2)* %in) #0 {
+ %ld = load <1 x i32>, <1 x i32> addrspace(2)* %in
+ %ext = zext <1 x i32> %ld to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v1i32_to_v1i64:
+; GCN: s_load_dword s[[LO:[0-9]+]]
+; GCN: s_ashr_i32 s[[HI:[0-9]+]], s[[LO]], 31
+; GCN: store_dwordx2
+define void @constant_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(2)* %in) #0 {
+ %ld = load <1 x i32>, <1 x i32> addrspace(2)* %in
+ %ext = sext <1 x i32> %ld to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v2i32_to_v2i64:
+; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
+; GCN: store_dwordx4
+define void @constant_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 {
+ %ld = load <2 x i32>, <2 x i32> addrspace(2)* %in
+ %ext = zext <2 x i32> %ld to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v2i32_to_v2i64:
+; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
+
+; GCN-DAG: s_ashr_i32
+; GCN-DAG: s_ashr_i32
+
+; GCN: store_dwordx4
+define void @constant_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 {
+ %ld = load <2 x i32>, <2 x i32> addrspace(2)* %in
+ %ext = sext <2 x i32> %ld to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v4i32_to_v4i64:
+; GCN: s_load_dwordx4
+
+; GCN: store_dwordx4
+; GCN: store_dwordx4
+define void @constant_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 {
+ %ld = load <4 x i32>, <4 x i32> addrspace(2)* %in
+ %ext = zext <4 x i32> %ld to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v4i32_to_v4i64:
+; GCN: s_load_dwordx4
+
+; GCN: s_ashr_i32
+; GCN: s_ashr_i32
+; GCN: s_ashr_i32
+; GCN: s_ashr_i32
+
+; GCN: store_dwordx4
+; GCN: store_dwordx4
+define void @constant_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 {
+ %ld = load <4 x i32>, <4 x i32> addrspace(2)* %in
+ %ext = sext <4 x i32> %ld to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v8i32_to_v8i64:
+; GCN: s_load_dwordx8
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-SA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+define void @constant_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 {
+ %ld = load <8 x i32>, <8 x i32> addrspace(2)* %in
+ %ext = zext <8 x i32> %ld to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v8i32_to_v8i64:
+; GCN: s_load_dwordx8
+
+; GCN: s_ashr_i32
+; GCN: s_ashr_i32
+; GCN: s_ashr_i32
+; GCN: s_ashr_i32
+; GCN: s_ashr_i32
+; GCN: s_ashr_i32
+; GCN: s_ashr_i32
+; GCN: s_ashr_i32
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+define void @constant_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 {
+ %ld = load <8 x i32>, <8 x i32> addrspace(2)* %in
+ %ext = sext <8 x i32> %ld to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v16i32_to_v16i64:
+; GCN: s_load_dwordx16
+
+
+; GCN-DAG: s_ashr_i32
+
+; GCN: store_dwordx4
+; GCN: store_dwordx4
+; GCN: store_dwordx4
+; GCN: store_dwordx4
+; GCN: store_dwordx4
+; GCN: store_dwordx4
+; GCN: store_dwordx4
+; GCN: store_dwordx4
+define void @constant_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 {
+ %ld = load <16 x i32>, <16 x i32> addrspace(2)* %in
+ %ext = sext <16 x i32> %ld to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v16i32_to_v16i64
+; GCN: s_load_dwordx16
+
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+define void @constant_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 {
+ %ld = load <16 x i32>, <16 x i32> addrspace(2)* %in
+ %ext = zext <16 x i32> %ld to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v32i32_to_v32i64:
+
+; GCN: s_load_dwordx16
+; GCN: s_load_dwordx16
+
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+
+define void @constant_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(2)* %in) #0 {
+ %ld = load <32 x i32>, <32 x i32> addrspace(2)* %in
+ %ext = sext <32 x i32> %ld to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v32i32_to_v32i64:
+; GCN: s_load_dwordx16
+; GCN: s_load_dwordx16
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+define void @constant_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(2)* %in) #0 {
+ %ld = load <32 x i32>, <32 x i32> addrspace(2)* %in
+ %ext = zext <32 x i32> %ld to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i64.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i64.ll
new file mode 100644
index 00000000000..6697fb0e54f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i64.ll
@@ -0,0 +1,90 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=VI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+
+; FUNC-LABEL: {{^}}constant_load_i64:
+; GCN: s_load_dwordx2 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
+; EG: VTX_READ_64
+define void @constant_load_i64(i64 addrspace(1)* %out, i64 addrspace(2)* %in) #0 {
+ %ld = load i64, i64 addrspace(2)* %in
+ store i64 %ld, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v2i64:
+; GCN: s_load_dwordx4
+
+; EG: VTX_READ_128
+define void @constant_load_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(2)* %in) #0 {
+entry:
+ %ld = load <2 x i64>, <2 x i64> addrspace(2)* %in
+ store <2 x i64> %ld, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v3i64:
+; GCN-DAG: s_load_dwordx4 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
+; SI-DAG: s_load_dwordx2 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x4{{$}}
+; VI-DAG: s_load_dwordx2 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x10{{$}}
+
+; EG-DAG: VTX_READ_32
+; EG-DAG: VTX_READ_32
+; EG-DAG: VTX_READ_32
+; EG-DAG: VTX_READ_32
+; EG-DAG: VTX_READ_32
+; EG-DAG: VTX_READ_32
+define void @constant_load_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(2)* %in) #0 {
+entry:
+ %ld = load <3 x i64>, <3 x i64> addrspace(2)* %in
+ store <3 x i64> %ld, <3 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v4i64
+; GCN: s_load_dwordx8
+
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @constant_load_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(2)* %in) #0 {
+entry:
+ %ld = load <4 x i64>, <4 x i64> addrspace(2)* %in
+ store <4 x i64> %ld, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v8i64:
+; GCN: s_load_dwordx16
+
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @constant_load_v8i64(<8 x i64> addrspace(1)* %out, <8 x i64> addrspace(2)* %in) #0 {
+entry:
+ %ld = load <8 x i64>, <8 x i64> addrspace(2)* %in
+ store <8 x i64> %ld, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v16i64:
+; GCN: s_load_dwordx16
+; GCN: s_load_dwordx16
+
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @constant_load_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(2)* %in) #0 {
+entry:
+ %ld = load <16 x i64>, <16 x i64> addrspace(2)* %in
+ store <16 x i64> %ld, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
new file mode 100644
index 00000000000..c0550c1ef38
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
@@ -0,0 +1,605 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+
+; FUNC-LABEL: {{^}}constant_load_i8:
+; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}
+; GCN-HSA: flat_load_ubyte
+
+; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
+define void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
+entry:
+ %ld = load i8, i8 addrspace(2)* %in
+ store i8 %ld, i8 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v2i8:
+; GCN-NOHSA: buffer_load_ushort v
+; GCN-HSA: flat_load_ushort v
+
+; EG: VTX_READ_16
+define void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
+entry:
+ %ld = load <2 x i8>, <2 x i8> addrspace(2)* %in
+ store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v3i8:
+; GCN: s_load_dword s
+
+; EG-DAG: VTX_READ_32
+define void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
+entry:
+ %ld = load <3 x i8>, <3 x i8> addrspace(2)* %in
+ store <3 x i8> %ld, <3 x i8> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v4i8:
+; GCN: s_load_dword s
+
+; EG: VTX_READ_32
+define void @constant_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
+entry:
+ %ld = load <4 x i8>, <4 x i8> addrspace(2)* %in
+ store <4 x i8> %ld, <4 x i8> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v8i8:
+; GCN: s_load_dwordx2
+
+; EG: VTX_READ_64
+define void @constant_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
+entry:
+ %ld = load <8 x i8>, <8 x i8> addrspace(2)* %in
+ store <8 x i8> %ld, <8 x i8> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_v16i8:
+; GCN: s_load_dwordx4
+
+; EG: VTX_READ_128
+define void @constant_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
+entry:
+ %ld = load <16 x i8>, <16 x i8> addrspace(2)* %in
+ store <16 x i8> %ld, <16 x i8> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_i8_to_i32:
+; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}},
+; GCN-HSA: flat_load_ubyte
+
+; EG: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
+define void @constant_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
+ %a = load i8, i8 addrspace(2)* %in
+ %ext = zext i8 %a to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_i8_to_i32:
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-HSA: flat_load_sbyte
+
+; EG: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
+; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
+; EG: 8
+define void @constant_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
+ %ld = load i8, i8 addrspace(2)* %in
+ %ext = sext i8 %ld to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i32:
+define void @constant_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
+ %load = load <1 x i8>, <1 x i8> addrspace(2)* %in
+ %ext = zext <1 x i8> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i32:
+define void @constant_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
+ %load = load <1 x i8>, <1 x i8> addrspace(2)* %in
+ %ext = sext <1 x i8> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i32:
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; EG: VTX_READ_8
+; EG: VTX_READ_8
+define void @constant_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
+ %load = load <2 x i8>, <2 x i8> addrspace(2)* %in
+ %ext = zext <2 x i8> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i32:
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+
+; EG-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
+; EG-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
+; EG-DAG: 8
+; EG-DAG: 8
+define void @constant_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
+ %load = load <2 x i8>, <2 x i8> addrspace(2)* %in
+ %ext = sext <2 x i8> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v3i8_to_v3i32:
+; GCN: s_load_dword s
+
+; GCN-DAG: s_bfe_u32
+; GCN-DAG: s_bfe_u32
+; GCN-DAG: s_and_b32
+define void @constant_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
+entry:
+ %ld = load <3 x i8>, <3 x i8> addrspace(2)* %in
+ %ext = zext <3 x i8> %ld to <3 x i32>
+ store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v3i8_to_v3i32:
+; GCN: s_load_dword s
+
+; GCN-DAG: s_bfe_i32
+; GCN-DAG: s_bfe_i32
+; GCN-DAG: s_bfe_i32
+define void @constant_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 {
+entry:
+ %ld = load <3 x i8>, <3 x i8> addrspace(2)* %in
+ %ext = sext <3 x i8> %ld to <3 x i32>
+ store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i32:
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+
+; EG: VTX_READ_8
+; EG: VTX_READ_8
+; EG: VTX_READ_8
+; EG: VTX_READ_8
+define void @constant_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
+ %load = load <4 x i8>, <4 x i8> addrspace(2)* %in
+ %ext = zext <4 x i8> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i32:
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+
+; EG-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
+; EG-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
+; EG-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
+; EG-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
+; EG-DAG: 8
+; EG-DAG: 8
+; EG-DAG: 8
+; EG-DAG: 8
+define void @constant_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
+ %load = load <4 x i8>, <4 x i8> addrspace(2)* %in
+ %ext = sext <4 x i8> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i32:
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+define void @constant_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
+ %load = load <8 x i8>, <8 x i8> addrspace(2)* %in
+ %ext = zext <8 x i8> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i32:
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+define void @constant_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
+ %load = load <8 x i8>, <8 x i8> addrspace(2)* %in
+ %ext = sext <8 x i8> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i32:
+define void @constant_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
+ %load = load <16 x i8>, <16 x i8> addrspace(2)* %in
+ %ext = zext <16 x i8> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i32:
+define void @constant_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
+ %load = load <16 x i8>, <16 x i8> addrspace(2)* %in
+ %ext = sext <16 x i8> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i32:
+define void @constant_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
+ %load = load <32 x i8>, <32 x i8> addrspace(2)* %in
+ %ext = zext <32 x i8> %load to <32 x i32>
+ store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i32:
+define void @constant_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
+ %load = load <32 x i8>, <32 x i8> addrspace(2)* %in
+ %ext = sext <32 x i8> %load to <32 x i32>
+ store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i32:
+define void @constant_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
+ %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
+ %ext = zext <64 x i8> %load to <64 x i32>
+ store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i32:
+define void @constant_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
+ %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
+ %ext = sext <64 x i8> %load to <64 x i32>
+ store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_i8_to_i64:
+; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+
+; GCN-NOHSA-DAG: buffer_load_ubyte v[[LO:[0-9]+]],
+; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
+
+; GCN-HSA-DAG: flat_load_ubyte v[[LO:[0-9]+]],
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]]
+define void @constant_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
+ %a = load i8, i8 addrspace(2)* %in
+ %ext = zext i8 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_i8_to_i64:
+; GCN-NOHSA: buffer_load_sbyte v[[LO:[0-9]+]],
+; GCN-HSA: flat_load_sbyte v[[LO:[0-9]+]],
+; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
+
+; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @constant_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
+ %a = load i8, i8 addrspace(2)* %in
+ %ext = sext i8 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i64:
+define void @constant_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
+ %load = load <1 x i8>, <1 x i8> addrspace(2)* %in
+ %ext = zext <1 x i8> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i64:
+define void @constant_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
+ %load = load <1 x i8>, <1 x i8> addrspace(2)* %in
+ %ext = sext <1 x i8> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i64:
+define void @constant_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
+ %load = load <2 x i8>, <2 x i8> addrspace(2)* %in
+ %ext = zext <2 x i8> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i64:
+define void @constant_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
+ %load = load <2 x i8>, <2 x i8> addrspace(2)* %in
+ %ext = sext <2 x i8> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i64:
+define void @constant_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
+ %load = load <4 x i8>, <4 x i8> addrspace(2)* %in
+ %ext = zext <4 x i8> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i64:
+define void @constant_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
+ %load = load <4 x i8>, <4 x i8> addrspace(2)* %in
+ %ext = sext <4 x i8> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i64:
+define void @constant_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
+ %load = load <8 x i8>, <8 x i8> addrspace(2)* %in
+ %ext = zext <8 x i8> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i64:
+define void @constant_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
+ %load = load <8 x i8>, <8 x i8> addrspace(2)* %in
+ %ext = sext <8 x i8> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i64:
+define void @constant_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
+ %load = load <16 x i8>, <16 x i8> addrspace(2)* %in
+ %ext = zext <16 x i8> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i64:
+define void @constant_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
+ %load = load <16 x i8>, <16 x i8> addrspace(2)* %in
+ %ext = sext <16 x i8> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i64:
+define void @constant_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
+ %load = load <32 x i8>, <32 x i8> addrspace(2)* %in
+ %ext = zext <32 x i8> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i64:
+define void @constant_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
+ %load = load <32 x i8>, <32 x i8> addrspace(2)* %in
+ %ext = sext <32 x i8> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+ ret void
+}
+
+; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i64:
+; define void @constant_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
+; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
+; %ext = zext <64 x i8> %load to <64 x i64>
+; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i64:
+; define void @constant_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
+; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
+; %ext = sext <64 x i8> %load to <64 x i64>
+; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+; ret void
+; }
+
+; FUNC-LABEL: {{^}}constant_zextload_i8_to_i16:
+; GCN-NOHSA: buffer_load_ubyte v[[VAL:[0-9]+]],
+; GCN-NOHSA: buffer_store_short v[[VAL]]
+
+; GCN-HSA: flat_load_ubyte v[[VAL:[0-9]+]],
+; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
+define void @constant_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
+ %a = load i8, i8 addrspace(2)* %in
+ %ext = zext i8 %a to i16
+ store i16 %ext, i16 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_i8_to_i16:
+; GCN-NOHSA: buffer_load_sbyte v[[VAL:[0-9]+]],
+; GCN-HSA: flat_load_sbyte v[[VAL:[0-9]+]],
+
+; GCN-NOHSA: buffer_store_short v[[VAL]]
+; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
+define void @constant_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(2)* %in) #0 {
+ %a = load i8, i8 addrspace(2)* %in
+ %ext = sext i8 %a to i16
+ store i16 %ext, i16 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i16:
+define void @constant_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
+ %load = load <1 x i8>, <1 x i8> addrspace(2)* %in
+ %ext = zext <1 x i8> %load to <1 x i16>
+ store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i16:
+define void @constant_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 {
+ %load = load <1 x i8>, <1 x i8> addrspace(2)* %in
+ %ext = sext <1 x i8> %load to <1 x i16>
+ store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i16:
+define void @constant_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
+ %load = load <2 x i8>, <2 x i8> addrspace(2)* %in
+ %ext = zext <2 x i8> %load to <2 x i16>
+ store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i16:
+define void @constant_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 {
+ %load = load <2 x i8>, <2 x i8> addrspace(2)* %in
+ %ext = sext <2 x i8> %load to <2 x i16>
+ store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i16:
+define void @constant_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
+ %load = load <4 x i8>, <4 x i8> addrspace(2)* %in
+ %ext = zext <4 x i8> %load to <4 x i16>
+ store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i16:
+define void @constant_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 {
+ %load = load <4 x i8>, <4 x i8> addrspace(2)* %in
+ %ext = sext <4 x i8> %load to <4 x i16>
+ store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i16:
+define void @constant_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
+ %load = load <8 x i8>, <8 x i8> addrspace(2)* %in
+ %ext = zext <8 x i8> %load to <8 x i16>
+ store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i16:
+define void @constant_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 {
+ %load = load <8 x i8>, <8 x i8> addrspace(2)* %in
+ %ext = sext <8 x i8> %load to <8 x i16>
+ store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i16:
+define void @constant_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
+ %load = load <16 x i8>, <16 x i8> addrspace(2)* %in
+ %ext = zext <16 x i8> %load to <16 x i16>
+ store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i16:
+define void @constant_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 {
+ %load = load <16 x i8>, <16 x i8> addrspace(2)* %in
+ %ext = sext <16 x i8> %load to <16 x i16>
+ store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i16:
+define void @constant_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
+ %load = load <32 x i8>, <32 x i8> addrspace(2)* %in
+ %ext = zext <32 x i8> %load to <32 x i16>
+ store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i16:
+define void @constant_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 {
+ %load = load <32 x i8>, <32 x i8> addrspace(2)* %in
+ %ext = sext <32 x i8> %load to <32 x i16>
+ store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
+ ret void
+}
+
+; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i16:
+; define void @constant_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
+; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
+; %ext = zext <64 x i8> %load to <64 x i16>
+; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i16:
+; define void @constant_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 {
+; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in
+; %ext = sext <64 x i8> %load to <64 x i16>
+; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
+; ret void
+; }
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-f32.ll b/llvm/test/CodeGen/AMDGPU/load-global-f32.ll
new file mode 100644
index 00000000000..23f4a6079e8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/load-global-f32.ll
@@ -0,0 +1,93 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}global_load_f32:
+; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}
+; GCN-HSA: flat_load_dword
+
+; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
+define void @global_load_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+entry:
+ %tmp0 = load float, float addrspace(1)* %in
+ store float %tmp0, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v2f32:
+; GCN-NOHSA: buffer_load_dwordx2
+; GCN-HSA: flat_load_dwordx2
+
+; R600: VTX_READ_64
+define void @global_load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
+entry:
+ %tmp0 = load <2 x float>, <2 x float> addrspace(1)* %in
+ store <2 x float> %tmp0, <2 x float> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v3f32:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; R600: VTX_READ_128
+define void @global_load_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
+entry:
+ %tmp0 = load <3 x float>, <3 x float> addrspace(1)* %in
+ store <3 x float> %tmp0, <3 x float> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v4f32:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; R600: VTX_READ_128
+define void @global_load_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
+entry:
+ %tmp0 = load <4 x float>, <4 x float> addrspace(1)* %in
+ store <4 x float> %tmp0, <4 x float> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v8f32:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; R600: VTX_READ_128
+; R600: VTX_READ_128
+define void @global_load_v8f32(<8 x float> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 {
+entry:
+ %tmp0 = load <8 x float>, <8 x float> addrspace(1)* %in
+ store <8 x float> %tmp0, <8 x float> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v16f32:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; R600: VTX_READ_128
+; R600: VTX_READ_128
+; R600: VTX_READ_128
+; R600: VTX_READ_128
+define void @global_load_v16f32(<16 x float> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 {
+entry:
+ %tmp0 = load <16 x float>, <16 x float> addrspace(1)* %in
+ store <16 x float> %tmp0, <16 x float> addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-f64.ll b/llvm/test/CodeGen/AMDGPU/load-global-f64.ll
new file mode 100644
index 00000000000..cd6b6f6848d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/load-global-f64.ll
@@ -0,0 +1,94 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}global_load_f64:
+; GCN-NOHSA: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
+; GCN-NOHSA: buffer_store_dwordx2 [[VAL]]
+
+; GCN-HSA: flat_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
+; GCN-HSA: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, [[VAL]]
+define void @global_load_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+ %ld = load double, double addrspace(1)* %in
+ store double %ld, double addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v2i64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+define void @global_load_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) #0 {
+entry:
+ %ld = load <2 x i64>, <2 x i64> addrspace(1)* %in
+ store <2 x i64> %ld, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v3f64:
+; GCN-NOHSA-DAG: buffer_load_dwordx4
+; GCN-NOHSA-DAG: buffer_load_dwordx2
+; GCN-HSA-DAG: flat_load_dwordx4
+; GCN-HSA-DAG: flat_load_dwordx2
+define void @global_load_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %in) #0 {
+entry:
+ %ld = load <3 x double>, <3 x double> addrspace(1)* %in
+ store <3 x double> %ld, <3 x double> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v4f64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+define void @global_load_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) #0 {
+entry:
+ %ld = load <4 x double>, <4 x double> addrspace(1)* %in
+ store <4 x double> %ld, <4 x double> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v8f64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+define void @global_load_v8f64(<8 x double> addrspace(1)* %out, <8 x double> addrspace(1)* %in) #0 {
+entry:
+ %ld = load <8 x double>, <8 x double> addrspace(1)* %in
+ store <8 x double> %ld, <8 x double> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v16f64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+define void @global_load_v16f64(<16 x double> addrspace(1)* %out, <16 x double> addrspace(1)* %in) #0 {
+entry:
+ %ld = load <16 x double>, <16 x double> addrspace(1)* %in
+ store <16 x double> %ld, <16 x double> addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i1.ll b/llvm/test/CodeGen/AMDGPU/load-global-i1.ll
new file mode 100644
index 00000000000..ebfec781087
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i1.ll
@@ -0,0 +1,371 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}global_load_i1:
+; GCN: buffer_load_ubyte
+; GCN: v_and_b32_e32 v{{[0-9]+}}, 1
+; GCN: buffer_store_byte
+
+; EG: VTX_READ_8
+; EG: AND_INT
+define void @global_load_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
+ %load = load i1, i1 addrspace(1)* %in
+ store i1 %load, i1 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v2i1:
+define void @global_load_v2i1(<2 x i1> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
+ %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
+ store <2 x i1> %load, <2 x i1> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v3i1:
+define void @global_load_v3i1(<3 x i1> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
+ %load = load <3 x i1>, <3 x i1> addrspace(1)* %in
+ store <3 x i1> %load, <3 x i1> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v4i1:
+define void @global_load_v4i1(<4 x i1> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
+ %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
+ store <4 x i1> %load, <4 x i1> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v8i1:
+define void @global_load_v8i1(<8 x i1> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
+ %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
+ store <8 x i1> %load, <8 x i1> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v16i1:
+define void @global_load_v16i1(<16 x i1> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
+ %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
+ store <16 x i1> %load, <16 x i1> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v32i1:
+define void @global_load_v32i1(<32 x i1> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
+ %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
+ store <32 x i1> %load, <32 x i1> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v64i1:
+define void @global_load_v64i1(<64 x i1> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
+ %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
+ store <64 x i1> %load, <64 x i1> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_i1_to_i32:
+; GCN: buffer_load_ubyte
+; GCN: buffer_store_dword
+define void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
+ %a = load i1, i1 addrspace(1)* %in
+ %ext = zext i1 %a to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_i1_to_i32:
+; GCN: buffer_load_ubyte
+; GCN: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}}
+; GCN: buffer_store_dword
+
+; EG: VTX_READ_8
+; EG: BFE_INT
+define void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
+ %a = load i1, i1 addrspace(1)* %in
+ %ext = sext i1 %a to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v1i1_to_v1i32:
+define void @global_zextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 {
+ %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
+ %ext = zext <1 x i1> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v1i1_to_v1i32:
+define void @global_sextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 {
+ %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
+ %ext = sext <1 x i1> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v2i1_to_v2i32:
+define void @global_zextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
+ %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
+ %ext = zext <2 x i1> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v2i1_to_v2i32:
+define void @global_sextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
+ %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
+ %ext = sext <2 x i1> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v3i1_to_v3i32:
+define void @global_zextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
+ %load = load <3 x i1>, <3 x i1> addrspace(1)* %in
+ %ext = zext <3 x i1> %load to <3 x i32>
+ store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v3i1_to_v3i32:
+define void @global_sextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
+ %load = load <3 x i1>, <3 x i1> addrspace(1)* %in
+ %ext = sext <3 x i1> %load to <3 x i32>
+ store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v4i1_to_v4i32:
+define void @global_zextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
+ %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
+ %ext = zext <4 x i1> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v4i1_to_v4i32:
+define void @global_sextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
+ %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
+ %ext = sext <4 x i1> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v8i1_to_v8i32:
+define void @global_zextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
+ %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
+ %ext = zext <8 x i1> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v8i1_to_v8i32:
+define void @global_sextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
+ %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
+ %ext = sext <8 x i1> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v16i1_to_v16i32:
+define void @global_zextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
+ %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
+ %ext = zext <16 x i1> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v16i1_to_v16i32:
+define void @global_sextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
+ %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
+ %ext = sext <16 x i1> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v32i1_to_v32i32:
+define void @global_zextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
+ %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
+ %ext = zext <32 x i1> %load to <32 x i32>
+ store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v32i1_to_v32i32:
+define void @global_sextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
+ %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
+ %ext = sext <32 x i1> %load to <32 x i32>
+ store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v64i1_to_v64i32:
+define void @global_zextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
+ %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
+ %ext = zext <64 x i1> %load to <64 x i32>
+ store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v64i1_to_v64i32:
+define void @global_sextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
+ %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
+ %ext = sext <64 x i1> %load to <64 x i32>
+ store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_i1_to_i64:
+; GCN-DAG: buffer_load_ubyte [[LOAD:v[0-9]+]],
+; GCN-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
+; GCN-DAG: v_and_b32_e32 {{v[0-9]+}}, 1, [[LOAD]]{{$}}
+; GCN: buffer_store_dwordx2
+define void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
+ %a = load i1, i1 addrspace(1)* %in
+ %ext = zext i1 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_i1_to_i64:
+; GCN: buffer_load_ubyte [[LOAD:v[0-9]+]],
+; GCN: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
+; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
+; GCN: buffer_store_dwordx2
+define void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
+ %a = load i1, i1 addrspace(1)* %in
+ %ext = sext i1 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v1i1_to_v1i64:
+define void @global_zextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 {
+ %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
+ %ext = zext <1 x i1> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v1i1_to_v1i64:
+define void @global_sextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 {
+ %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
+ %ext = sext <1 x i1> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v2i1_to_v2i64:
+define void @global_zextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
+ %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
+ %ext = zext <2 x i1> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v2i1_to_v2i64:
+define void @global_sextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 {
+ %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
+ %ext = sext <2 x i1> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v3i1_to_v3i64:
+define void @global_zextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
+ %load = load <3 x i1>, <3 x i1> addrspace(1)* %in
+ %ext = zext <3 x i1> %load to <3 x i64>
+ store <3 x i64> %ext, <3 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v3i1_to_v3i64:
+define void @global_sextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 {
+ %load = load <3 x i1>, <3 x i1> addrspace(1)* %in
+ %ext = sext <3 x i1> %load to <3 x i64>
+ store <3 x i64> %ext, <3 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v4i1_to_v4i64:
+define void @global_zextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
+ %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
+ %ext = zext <4 x i1> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v4i1_to_v4i64:
+define void @global_sextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 {
+ %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
+ %ext = sext <4 x i1> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v8i1_to_v8i64:
+define void @global_zextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
+ %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
+ %ext = zext <8 x i1> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v8i1_to_v8i64:
+define void @global_sextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 {
+ %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
+ %ext = sext <8 x i1> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v16i1_to_v16i64:
+define void @global_zextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
+ %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
+ %ext = zext <16 x i1> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v16i1_to_v16i64:
+define void @global_sextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 {
+ %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
+ %ext = sext <16 x i1> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v32i1_to_v32i64:
+define void @global_zextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
+ %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
+ %ext = zext <32 x i1> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v32i1_to_v32i64:
+define void @global_sextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 {
+ %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
+ %ext = sext <32 x i1> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v64i1_to_v64i64:
+define void @global_zextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
+ %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
+ %ext = zext <64 x i1> %load to <64 x i64>
+ store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v64i1_to_v64i64:
+define void @global_sextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 {
+ %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
+ %ext = sext <64 x i1> %load to <64 x i64>
+ store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
new file mode 100644
index 00000000000..d621c815b0d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
@@ -0,0 +1,774 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FIXME: r600 is broken because the bigger testcases spill and it's not implemented
+
+; FUNC-LABEL: {{^}}global_load_i16:
+; GCN-NOHSA: buffer_load_ushort v{{[0-9]+}}
+; GCN-HSA: flat_load_ushort
+
+; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
+define void @global_load_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
+entry:
+ %ld = load i16, i16 addrspace(1)* %in
+ store i16 %ld, i16 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v2i16:
+; GCN-NOHSA: buffer_load_dword v
+; GCN-HSA: flat_load_dword v
+
+; EG: VTX_READ_32
+define void @global_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
+entry:
+ %ld = load <2 x i16>, <2 x i16> addrspace(1)* %in
+ store <2 x i16> %ld, <2 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v3i16:
+; GCN-NOHSA: buffer_load_dwordx2 v
+; GCN-HSA: flat_load_dwordx2 v
+
+; EG-DAG: VTX_READ_32
+; EG-DAG: VTX_READ_16
+define void @global_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
+entry:
+ %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
+ store <3 x i16> %ld, <3 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v4i16:
+; GCN-NOHSA: buffer_load_dwordx2
+; GCN-HSA: flat_load_dwordx2
+
+; EG: VTX_READ_64
+define void @global_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
+entry:
+ %ld = load <4 x i16>, <4 x i16> addrspace(1)* %in
+ store <4 x i16> %ld, <4 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v8i16:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; EG: VTX_READ_128
+define void @global_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) {
+entry:
+ %ld = load <8 x i16>, <8 x i16> addrspace(1)* %in
+ store <8 x i16> %ld, <8 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v16i16:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @global_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) {
+entry:
+ %ld = load <16 x i16>, <16 x i16> addrspace(1)* %in
+ store <16 x i16> %ld, <16 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_i16_to_i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_store_dword
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_store_dword
+
+; EG: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
+define void @global_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
+ %a = load i16, i16 addrspace(1)* %in
+ %ext = zext i16 %a to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_i16_to_i32:
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_store_dword
+
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_store_dword
+
+; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
+; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
+; EG: 16
+define void @global_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
+ %a = load i16, i16 addrspace(1)* %in
+ %ext = sext i16 %a to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v1i16_to_v1i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-HSA: flat_load_ushort
+define void @global_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
+ %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
+ %ext = zext <1 x i16> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v1i16_to_v1i32:
+; GCN-NOHSA: buffer_load_sshort
+; GCN-HSA: flat_load_sshort
+define void @global_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
+ %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
+ %ext = sext <1 x i16> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v2i16_to_v2i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+define void @global_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+ %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
+ %ext = zext <2 x i16> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v2i16_to_v2i32:
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+
+; EG-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
+; EG-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
+; EG-DAG: 16
+; EG-DAG: 16
+define void @global_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+ %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
+ %ext = sext <2 x i16> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_global_zextload_v3i16_to_v3i32:
+; GCN-NOHSA: buffer_load_dwordx2
+; GCN-HSA: flat_load_dwordx2
+define void @global_global_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
+entry:
+ %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
+ %ext = zext <3 x i16> %ld to <3 x i32>
+ store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_global_sextload_v3i16_to_v3i32:
+; GCN-NOHSA: buffer_load_dwordx2
+; GCN-HSA: flat_load_dwordx2
+define void @global_global_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
+entry:
+ %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
+ %ext = sext <3 x i16> %ld to <3 x i32>
+ store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_global_zextload_v4i16_to_v4i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+
+; EG: VTX_READ_16
+; EG: VTX_READ_16
+; EG: VTX_READ_16
+; EG: VTX_READ_16
+define void @global_global_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+ %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
+ %ext = zext <4 x i16> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v4i16_to_v4i32:
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+
+; EG-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
+; EG-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
+; EG-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
+; EG-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
+; EG-DAG: 16
+; EG-DAG: 16
+; EG-DAG: 16
+; EG-DAG: 16
+define void @global_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+ %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
+ %ext = sext <4 x i16> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v8i16_to_v8i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+define void @global_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
+ %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
+ %ext = zext <8 x i16> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v8i16_to_v8i32:
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+define void @global_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
+ %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
+ %ext = sext <8 x i16> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v16i16_to_v16i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+define void @global_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
+ %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
+ %ext = zext <16 x i16> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v16i16_to_v16i32:
+define void @global_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
+ %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
+ %ext = sext <16 x i16> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v32i16_to_v32i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+define void @global_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
+ %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
+ %ext = zext <32 x i16> %load to <32 x i32>
+ store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v32i16_to_v32i32:
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+; GCN-NOHSA: buffer_load_sshort
+
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+; GCN-HSA: flat_load_sshort
+define void @global_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
+ %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
+ %ext = sext <32 x i16> %load to <32 x i32>
+ store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v64i16_to_v64i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+define void @global_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
+ %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
+ %ext = zext <64 x i16> %load to <64 x i32>
+ store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v64i16_to_v64i32:
+define void @global_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
+ %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
+ %ext = sext <64 x i16> %load to <64 x i32>
+ store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_i16_to_i64:
+; GCN-NOHSA-DAG: buffer_load_ushort v[[LO:[0-9]+]],
+; GCN-HSA-DAG: flat_load_ushort v[[LO:[0-9]+]],
+; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+
+; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @global_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
+ %a = load i16, i16 addrspace(1)* %in
+ %ext = zext i16 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_i16_to_i64:
+; GCN-NOHSA-DAG: buffer_load_sshort v[[LO:[0-9]+]],
+; GCN-HSA-DAG: flat_load_sshort v[[LO:[0-9]+]],
+; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
+
+; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @global_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
+ %a = load i16, i16 addrspace(1)* %in
+ %ext = sext i16 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v1i16_to_v1i64:
+define void @global_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
+ %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
+ %ext = zext <1 x i16> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v1i16_to_v1i64:
+define void @global_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
+ %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
+ %ext = sext <1 x i16> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v2i16_to_v2i64:
+define void @global_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+ %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
+ %ext = zext <2 x i16> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v2i16_to_v2i64:
+define void @global_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+ %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
+ %ext = sext <2 x i16> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v4i16_to_v4i64:
+define void @global_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+ %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
+ %ext = zext <4 x i16> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v4i16_to_v4i64:
+define void @global_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+ %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
+ %ext = sext <4 x i16> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v8i16_to_v8i64:
+define void @global_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
+ %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
+ %ext = zext <8 x i16> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v8i16_to_v8i64:
+define void @global_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
+ %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
+ %ext = sext <8 x i16> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v16i16_to_v16i64:
+define void @global_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
+ %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
+ %ext = zext <16 x i16> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v16i16_to_v16i64:
+define void @global_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
+ %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
+ %ext = sext <16 x i16> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v32i16_to_v32i64:
+define void @global_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
+ %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
+ %ext = zext <32 x i16> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v32i16_to_v32i64:
+define void @global_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
+ %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
+ %ext = sext <32 x i16> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+ ret void
+}
+
+; ; XFUNC-LABEL: {{^}}global_zextload_v64i16_to_v64i64:
+; define void @global_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
+; %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
+; %ext = zext <64 x i16> %load to <64 x i64>
+; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+; ret void
+; }
+
+; ; XFUNC-LABEL: {{^}}global_sextload_v64i16_to_v64i64:
+; define void @global_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
+; %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
+; %ext = sext <64 x i16> %load to <64 x i64>
+; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+; ret void
+; }
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i32.ll b/llvm/test/CodeGen/AMDGPU/load-global-i32.ll
new file mode 100644
index 00000000000..c25470b1b78
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i32.ll
@@ -0,0 +1,523 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+
+; FUNC-LABEL: {{^}}global_load_i32:
+; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}
+; GCN-HSA: flat_load_dword
+
+; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
+define void @global_load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+entry:
+ %ld = load i32, i32 addrspace(1)* %in
+ store i32 %ld, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v2i32:
+; GCN-NOHSA: buffer_load_dwordx2
+; GCN-HSA: flat_load_dwordx2
+
+; EG: VTX_READ_64
+define void @global_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
+entry:
+ %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
+ store <2 x i32> %ld, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v3i32:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; EG: VTX_READ_128
+define void @global_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %in) #0 {
+entry:
+ %ld = load <3 x i32>, <3 x i32> addrspace(1)* %in
+ store <3 x i32> %ld, <3 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v4i32:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; EG: VTX_READ_128
+define void @global_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+entry:
+ %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
+ store <4 x i32> %ld, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v8i32:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @global_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
+entry:
+ %ld = load <8 x i32>, <8 x i32> addrspace(1)* %in
+ store <8 x i32> %ld, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v16i32:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @global_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
+entry:
+ %ld = load <16 x i32>, <16 x i32> addrspace(1)* %in
+ store <16 x i32> %ld, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_i32_to_i64:
+; GCN-NOHSA-DAG: buffer_load_dword v[[LO:[0-9]+]],
+; GCN-HSA-DAG: flat_load_dword v[[LO:[0-9]+]],
+; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+
+; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]]
+
+; EG: MEM_RAT
+; EG: MEM_RAT
+define void @global_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %ld = load i32, i32 addrspace(1)* %in
+ %ext = zext i32 %ld to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_i32_to_i64:
+; GCN-NOHSA: buffer_load_dword v[[LO:[0-9]+]]
+; GCN-HSA: flat_load_dword v[[LO:[0-9]+]]
+; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
+; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+
+; EG: MEM_RAT
+; EG: MEM_RAT
+; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x
+; EG: 31
+define void @global_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %ld = load i32, i32 addrspace(1)* %in
+ %ext = sext i32 %ld to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v1i32_to_v1i64:
+; GCN-NOHSA: buffer_load_dword
+; GCN-NOHSA: buffer_store_dwordx2
+
+; GCN-HSA: flat_load_dword
+; GCN-HSA: flat_store_dwordx2
+define void @global_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* %in) #0 {
+ %ld = load <1 x i32>, <1 x i32> addrspace(1)* %in
+ %ext = zext <1 x i32> %ld to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v1i32_to_v1i64:
+; GCN-NOHSA: buffer_load_dword v[[LO:[0-9]+]]
+; GCN-HSA: flat_load_dword v[[LO:[0-9]+]]
+; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
+; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @global_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* %in) #0 {
+ %ld = load <1 x i32>, <1 x i32> addrspace(1)* %in
+ %ext = sext <1 x i32> %ld to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v2i32_to_v2i64:
+; GCN-NOHSA: buffer_load_dwordx2
+; GCN-NOHSA: buffer_store_dwordx4
+
+; GCN-HSA: flat_load_dwordx2
+; GCN-HSA: flat_store_dwordx4
+define void @global_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
+ %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
+ %ext = zext <2 x i32> %ld to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v2i32_to_v2i64:
+; GCN-NOHSA: buffer_load_dwordx2
+; GCN-HSA: flat_load_dwordx2
+
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+define void @global_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
+ %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
+ %ext = sext <2 x i32> %ld to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v4i32_to_v4i64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+define void @global_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+ %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
+ %ext = zext <4 x i32> %ld to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v4i32_to_v4i64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+define void @global_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+ %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
+ %ext = sext <4 x i32> %ld to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v8i32_to_v8i64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-SA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+define void @global_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
+ %ld = load <8 x i32>, <8 x i32> addrspace(1)* %in
+ %ext = zext <8 x i32> %ld to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v8i32_to_v8i64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+define void @global_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
+ %ld = load <8 x i32>, <8 x i32> addrspace(1)* %in
+ %ext = sext <8 x i32> %ld to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v16i32_to_v16i64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+define void @global_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
+ %ld = load <16 x i32>, <16 x i32> addrspace(1)* %in
+ %ext = sext <16 x i32> %ld to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v16i32_to_v16i64
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+define void @global_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
+ %ld = load <16 x i32>, <16 x i32> addrspace(1)* %in
+ %ext = zext <16 x i32> %ld to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v32i32_to_v32i64:
+
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+; GCN-DAG: v_ashrrev_i32
+
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+; GCN-NOHSA: buffer_store_dwordx4
+
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+; GCN-HSA: flat_store_dwordx4
+
+define void @global_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 {
+ %ld = load <32 x i32>, <32 x i32> addrspace(1)* %in
+ %ext = sext <32 x i32> %ld to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v32i32_to_v32i64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+; GCN-NOHSA-DAG: buffer_store_dwordx4
+
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+; GCN-HSA-DAG: flat_store_dwordx4
+define void @global_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 {
+ %ld = load <32 x i32>, <32 x i32> addrspace(1)* %in
+ %ext = zext <32 x i32> %ld to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i64.ll b/llvm/test/CodeGen/AMDGPU/load-global-i64.ll
new file mode 100644
index 00000000000..b5367319ec0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i64.ll
@@ -0,0 +1,125 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}global_load_i64:
+; GCN-NOHSA: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
+; GCN-NOHSA: buffer_store_dwordx2 [[VAL]]
+
+; GCN-HSA: flat_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
+; GCN-HSA: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, [[VAL]]
+
+; EG: VTX_READ_64
+define void @global_load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
+ %ld = load i64, i64 addrspace(1)* %in
+ store i64 %ld, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v2i64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; EG: VTX_READ_128
+define void @global_load_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) #0 {
+entry:
+ %ld = load <2 x i64>, <2 x i64> addrspace(1)* %in
+ store <2 x i64> %ld, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v3i64:
+; GCN-NOHSA-DAG: buffer_load_dwordx4
+; GCN-NOHSA-DAG: buffer_load_dwordx2
+; GCN-HSA-DAG: flat_load_dwordx4
+; GCN-HSA-DAG: flat_load_dwordx2
+
+; EG-DAG: VTX_READ_32
+; EG-DAG: VTX_READ_32
+; EG-DAG: VTX_READ_32
+; EG-DAG: VTX_READ_32
+; EG-DAG: VTX_READ_32
+; EG-DAG: VTX_READ_32
+define void @global_load_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(1)* %in) #0 {
+entry:
+ %ld = load <3 x i64>, <3 x i64> addrspace(1)* %in
+ store <3 x i64> %ld, <3 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v4i64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @global_load_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
+entry:
+ %ld = load <4 x i64>, <4 x i64> addrspace(1)* %in
+ store <4 x i64> %ld, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v8i64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @global_load_v8i64(<8 x i64> addrspace(1)* %out, <8 x i64> addrspace(1)* %in) #0 {
+entry:
+ %ld = load <8 x i64>, <8 x i64> addrspace(1)* %in
+ store <8 x i64> %ld, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v16i64:
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+; GCN-HSA: flat_load_dwordx4
+
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+; EG: VTX_READ_128
+define void @global_load_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(1)* %in) #0 {
+entry:
+ %ld = load <16 x i64>, <16 x i64> addrspace(1)* %in
+ store <16 x i64> %ld, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i8.ll b/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
new file mode 100644
index 00000000000..c58c3bfa73a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
@@ -0,0 +1,579 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+
+; FUNC-LABEL: {{^}}global_load_i8:
+; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}
+; GCN-HSA: flat_load_ubyte
+
+; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
+define void @global_load_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+entry:
+ %ld = load i8, i8 addrspace(1)* %in
+ store i8 %ld, i8 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v2i8:
+; GCN-NOHSA: buffer_load_ushort v
+; GCN-HSA: flat_load_ushort v
+
+; EG: VTX_READ_16
+define void @global_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
+entry:
+ %ld = load <2 x i8>, <2 x i8> addrspace(1)* %in
+ store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v3i8:
+; GCN-NOHSA: buffer_load_dword v
+; GCN-HSA: flat_load_dword v
+
+; EG-DAG: VTX_READ_32
+define void @global_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 {
+entry:
+ %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in
+ store <3 x i8> %ld, <3 x i8> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v4i8:
+; GCN-NOHSA: buffer_load_dword v
+; GCN-HSA: flat_load_dword v
+
+; EG: VTX_READ_32
+define void @global_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
+entry:
+ %ld = load <4 x i8>, <4 x i8> addrspace(1)* %in
+ store <4 x i8> %ld, <4 x i8> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v8i8:
+; GCN-NOHSA: buffer_load_dwordx2
+; GCN-HSA: flat_load_dwordx2
+
+; EG: VTX_READ_64
+define void @global_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
+entry:
+ %ld = load <8 x i8>, <8 x i8> addrspace(1)* %in
+ store <8 x i8> %ld, <8 x i8> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_load_v16i8:
+; GCN-NOHSA: buffer_load_dwordx4
+
+; GCN-HSA: flat_load_dwordx4
+
+; EG: VTX_READ_128
+define void @global_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
+entry:
+ %ld = load <16 x i8>, <16 x i8> addrspace(1)* %in
+ store <16 x i8> %ld, <16 x i8> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_i8_to_i32:
+; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}},
+; GCN-HSA: flat_load_ubyte
+
+; EG: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
+define void @global_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+ %a = load i8, i8 addrspace(1)* %in
+ %ext = zext i8 %a to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_i8_to_i32:
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-HSA: flat_load_sbyte
+
+; EG: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
+; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
+; EG: 8
+define void @global_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+ %ld = load i8, i8 addrspace(1)* %in
+ %ext = sext i8 %ld to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i32:
+define void @global_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
+ %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
+ %ext = zext <1 x i8> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i32:
+define void @global_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
+ %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
+ %ext = sext <1 x i8> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i32:
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; EG: VTX_READ_8
+; EG: VTX_READ_8
+define void @global_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
+ %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
+ %ext = zext <2 x i8> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i32:
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+
+; EG-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
+; EG-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
+; EG-DAG: 8
+; EG-DAG: 8
+define void @global_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
+ %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
+ %ext = sext <2 x i8> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v3i8_to_v3i32:
+; GCN-NOHSA: buffer_load_dword v
+; GCN-HSA: flat_load_dword v
+
+; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8
+; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8
+; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xff,
+define void @global_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 {
+entry:
+ %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in
+ %ext = zext <3 x i8> %ld to <3 x i32>
+ store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v3i8_to_v3i32:
+; GCN-NOHSA: buffer_load_dword v
+; GCN-HSA: flat_load_dword v
+
+; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8
+; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
+; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8
+define void @global_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 {
+entry:
+ %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in
+ %ext = sext <3 x i8> %ld to <3 x i32>
+ store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i32:
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+
+; EG: VTX_READ_8
+; EG: VTX_READ_8
+; EG: VTX_READ_8
+; EG: VTX_READ_8
+define void @global_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
+ %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
+ %ext = zext <4 x i8> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i32:
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-NOHSA: buffer_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+; GCN-HSA: flat_load_sbyte
+
+; EG-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
+; EG-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
+; EG-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
+; EG-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
+; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
+; EG-DAG: 8
+; EG-DAG: 8
+; EG-DAG: 8
+; EG-DAG: 8
+define void @global_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
+ %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
+ %ext = sext <4 x i8> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i32:
+define void @global_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
+ %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
+ %ext = zext <8 x i8> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i32:
+define void @global_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
+ %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
+ %ext = sext <8 x i8> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i32:
+define void @global_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
+ %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
+ %ext = zext <16 x i8> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i32:
+define void @global_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
+ %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
+ %ext = sext <16 x i8> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i32:
+define void @global_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
+ %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
+ %ext = zext <32 x i8> %load to <32 x i32>
+ store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i32:
+define void @global_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
+ %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
+ %ext = sext <32 x i8> %load to <32 x i32>
+ store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i32:
+define void @global_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
+ %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
+ %ext = zext <64 x i8> %load to <64 x i32>
+ store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i32:
+define void @global_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
+ %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
+ %ext = sext <64 x i8> %load to <64 x i32>
+ store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_i8_to_i64:
+; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+
+; GCN-NOHSA-DAG: buffer_load_ubyte v[[LO:[0-9]+]],
+; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
+
+; GCN-HSA-DAG: flat_load_ubyte v[[LO:[0-9]+]],
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]]
+define void @global_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+ %a = load i8, i8 addrspace(1)* %in
+ %ext = zext i8 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_i8_to_i64:
+; GCN-NOHSA: buffer_load_sbyte v[[LO:[0-9]+]],
+; GCN-HSA: flat_load_sbyte v[[LO:[0-9]+]],
+; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
+
+; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @global_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+ %a = load i8, i8 addrspace(1)* %in
+ %ext = sext i8 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i64:
+define void @global_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
+ %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
+ %ext = zext <1 x i8> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i64:
+define void @global_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
+ %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
+ %ext = sext <1 x i8> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i64:
+define void @global_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
+ %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
+ %ext = zext <2 x i8> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i64:
+define void @global_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
+ %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
+ %ext = sext <2 x i8> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i64:
+define void @global_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
+ %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
+ %ext = zext <4 x i8> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i64:
+define void @global_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
+ %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
+ %ext = sext <4 x i8> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i64:
+define void @global_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
+ %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
+ %ext = zext <8 x i8> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i64:
+define void @global_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
+ %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
+ %ext = sext <8 x i8> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i64:
+define void @global_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
+ %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
+ %ext = zext <16 x i8> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i64:
+define void @global_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
+ %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
+ %ext = sext <16 x i8> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i64:
+define void @global_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
+ %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
+ %ext = zext <32 x i8> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i64:
+define void @global_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
+ %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
+ %ext = sext <32 x i8> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+ ret void
+}
+
+; XFUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i64:
+; define void @global_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
+; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
+; %ext = zext <64 x i8> %load to <64 x i64>
+; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i64:
+; define void @global_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
+; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
+; %ext = sext <64 x i8> %load to <64 x i64>
+; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+; ret void
+; }
+
+; FUNC-LABEL: {{^}}global_zextload_i8_to_i16:
+; GCN-NOHSA: buffer_load_ubyte v[[VAL:[0-9]+]],
+; GCN-NOHSA: buffer_store_short v[[VAL]]
+
+; GCN-HSA: flat_load_ubyte v[[VAL:[0-9]+]],
+; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
+define void @global_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+ %a = load i8, i8 addrspace(1)* %in
+ %ext = zext i8 %a to i16
+ store i16 %ext, i16 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_i8_to_i16:
+; GCN-NOHSA: buffer_load_sbyte v[[VAL:[0-9]+]],
+; GCN-HSA: flat_load_sbyte v[[VAL:[0-9]+]],
+
+; GCN-NOHSA: buffer_store_short v[[VAL]]
+; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
+define void @global_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+ %a = load i8, i8 addrspace(1)* %in
+ %ext = sext i8 %a to i16
+ store i16 %ext, i16 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i16:
+define void @global_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
+ %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
+ %ext = zext <1 x i8> %load to <1 x i16>
+ store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i16:
+define void @global_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
+ %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
+ %ext = sext <1 x i8> %load to <1 x i16>
+ store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i16:
+define void @global_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
+ %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
+ %ext = zext <2 x i8> %load to <2 x i16>
+ store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i16:
+define void @global_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
+ %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
+ %ext = sext <2 x i8> %load to <2 x i16>
+ store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i16:
+define void @global_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
+ %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
+ %ext = zext <4 x i8> %load to <4 x i16>
+ store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i16:
+define void @global_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
+ %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
+ %ext = sext <4 x i8> %load to <4 x i16>
+ store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i16:
+define void @global_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
+ %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
+ %ext = zext <8 x i8> %load to <8 x i16>
+ store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i16:
+define void @global_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
+ %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
+ %ext = sext <8 x i8> %load to <8 x i16>
+ store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i16:
+define void @global_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
+ %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
+ %ext = zext <16 x i8> %load to <16 x i16>
+ store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i16:
+define void @global_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
+ %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
+ %ext = sext <16 x i8> %load to <16 x i16>
+ store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i16:
+define void @global_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
+ %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
+ %ext = zext <32 x i8> %load to <32 x i16>
+ store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i16:
+define void @global_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
+ %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
+ %ext = sext <32 x i8> %load to <32 x i16>
+ store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
+ ret void
+}
+
+; XFUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i16:
+; define void @global_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
+; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
+; %ext = zext <64 x i8> %load to <64 x i16>
+; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i16:
+; define void @global_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
+; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
+; %ext = sext <64 x i8> %load to <64 x i16>
+; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
+; ret void
+; }
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-i1.ll b/llvm/test/CodeGen/AMDGPU/load-i1.ll
deleted file mode 100644
index e71cf53e24b..00000000000
--- a/llvm/test/CodeGen/AMDGPU/load-i1.ll
+++ /dev/null
@@ -1,149 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-
-; FUNC-LABEL: {{^}}global_copy_i1_to_i1:
-; SI: buffer_load_ubyte
-; SI: v_and_b32_e32 v{{[0-9]+}}, 1
-; SI: buffer_store_byte
-; SI: s_endpgm
-
-; EG: VTX_READ_8
-; EG: AND_INT
-define void @global_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1, i1 addrspace(1)* %in
- store i1 %load, i1 addrspace(1)* %out, align 1
- ret void
-}
-
-; FUNC-LABEL: {{^}}local_copy_i1_to_i1:
-; SI: ds_read_u8
-; SI: v_and_b32_e32 v{{[0-9]+}}, 1
-; SI: ds_write_b8
-; SI: s_endpgm
-
-; EG: LDS_UBYTE_READ_RET
-; EG: AND_INT
-; EG: LDS_BYTE_WRITE
-define void @local_copy_i1_to_i1(i1 addrspace(3)* %out, i1 addrspace(3)* %in) nounwind {
- %load = load i1, i1 addrspace(3)* %in
- store i1 %load, i1 addrspace(3)* %out, align 1
- ret void
-}
-
-; FUNC-LABEL: {{^}}constant_copy_i1_to_i1:
-; SI: buffer_load_ubyte
-; SI: v_and_b32_e32 v{{[0-9]+}}, 1
-; SI: buffer_store_byte
-; SI: s_endpgm
-
-; EG: VTX_READ_8
-; EG: AND_INT
-define void @constant_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(2)* %in) nounwind {
- %load = load i1, i1 addrspace(2)* %in
- store i1 %load, i1 addrspace(1)* %out, align 1
- ret void
-}
-
-; FUNC-LABEL: {{^}}global_sextload_i1_to_i32:
-; SI: buffer_load_ubyte
-; SI: v_bfe_i32
-; SI: buffer_store_dword
-; SI: s_endpgm
-
-; EG: VTX_READ_8
-; EG: BFE_INT
-define void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1, i1 addrspace(1)* %in
- %ext = sext i1 %load to i32
- store i32 %ext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}global_zextload_i1_to_i32:
-; SI: buffer_load_ubyte
-; SI: buffer_store_dword
-; SI: s_endpgm
-
-define void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1, i1 addrspace(1)* %in
- %ext = zext i1 %load to i32
- store i32 %ext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}global_sextload_i1_to_i64:
-; SI: buffer_load_ubyte
-; SI: v_bfe_i32
-; SI: buffer_store_dwordx2
-; SI: s_endpgm
-define void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1, i1 addrspace(1)* %in
- %ext = sext i1 %load to i64
- store i64 %ext, i64 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}global_zextload_i1_to_i64:
-; SI-DAG: buffer_load_ubyte
-; SI-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0
-; SI: buffer_store_dwordx2
-; SI: s_endpgm
-define void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1, i1 addrspace(1)* %in
- %ext = zext i1 %load to i64
- store i64 %ext, i64 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}i1_arg:
-; SI: buffer_load_ubyte
-; SI: v_and_b32_e32
-; SI: buffer_store_byte
-; SI: s_endpgm
-define void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
- store i1 %x, i1 addrspace(1)* %out, align 1
- ret void
-}
-
-; FUNC-LABEL: {{^}}i1_arg_zext_i32:
-; SI: buffer_load_ubyte
-; SI: buffer_store_dword
-; SI: s_endpgm
-define void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
- %ext = zext i1 %x to i32
- store i32 %ext, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}i1_arg_zext_i64:
-; SI: buffer_load_ubyte
-; SI: buffer_store_dwordx2
-; SI: s_endpgm
-define void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
- %ext = zext i1 %x to i64
- store i64 %ext, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; FUNC-LABEL: {{^}}i1_arg_sext_i32:
-; SI: buffer_load_ubyte
-; SI: buffer_store_dword
-; SI: s_endpgm
-define void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
- %ext = sext i1 %x to i32
- store i32 %ext, i32addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}i1_arg_sext_i64:
-; SI: buffer_load_ubyte
-; SI: v_bfe_i32
-; SI: v_ashrrev_i32
-; SI: buffer_store_dwordx2
-; SI: s_endpgm
-define void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
- %ext = sext i1 %x to i64
- store i64 %ext, i64 addrspace(1)* %out, align 8
- ret void
-}
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-f32.ll b/llvm/test/CodeGen/AMDGPU/load-local-f32.ll
new file mode 100644
index 00000000000..17c09d149ac
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/load-local-f32.ll
@@ -0,0 +1,109 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}load_f32_local:
+; GCN: s_mov_b32 m0
+; GCN: ds_read_b32
+
+; EG: LDS_READ_RET
+define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) #0 {
+entry:
+ %tmp0 = load float, float addrspace(3)* %in
+ store float %tmp0, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}load_v2f32_local:
+; GCN: s_mov_b32 m0
+; GCN: ds_read_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) #0 {
+entry:
+ %tmp0 = load <2 x float>, <2 x float> addrspace(3)* %in
+ store <2 x float> %tmp0, <2 x float> addrspace(1)* %out
+ ret void
+}
+
+; FIXME: should only do one b64 load
+; FUNC-LABEL: {{^}}local_load_v3f32:
+; GCN: ds_read2_b64
+; GCN: s_waitcnt
+; GCN-DAG: ds_write_b64
+; GCN-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:8{{$}}
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v3f32(<3 x float> addrspace(3)* %out, <3 x float> addrspace(3)* %in) #0 {
+entry:
+ %tmp0 = load <3 x float>, <3 x float> addrspace(3)* %in
+ store <3 x float> %tmp0, <3 x float> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v4f32:
+; GCN: ds_read2_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v4f32(<4 x float> addrspace(3)* %out, <4 x float> addrspace(3)* %in) #0 {
+entry:
+ %tmp0 = load <4 x float>, <4 x float> addrspace(3)* %in
+ store <4 x float> %tmp0, <4 x float> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v8f32:
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v8f32(<8 x float> addrspace(3)* %out, <8 x float> addrspace(3)* %in) #0 {
+entry:
+ %tmp0 = load <8 x float>, <8 x float> addrspace(3)* %in
+ store <8 x float> %tmp0, <8 x float> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v16f32:
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v16f32(<16 x float> addrspace(3)* %out, <16 x float> addrspace(3)* %in) #0 {
+entry:
+ %tmp0 = load <16 x float>, <16 x float> addrspace(3)* %in
+ store <16 x float> %tmp0, <16 x float> addrspace(3)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-f64.ll b/llvm/test/CodeGen/AMDGPU/load-local-f64.ll
new file mode 100644
index 00000000000..27d39b7e9d7
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/load-local-f64.ll
@@ -0,0 +1,154 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}local_load_f64:
+; GCN: ds_read_b64 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}{{$}}
+; GCN: ds_write_b64 v{{[0-9]+}}, [[VAL]]
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_f64(double addrspace(3)* %out, double addrspace(3)* %in) #0 {
+ %ld = load double, double addrspace(3)* %in
+ store double %ld, double addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v2f64:
+; GCN: ds_read2_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v2f64(<2 x double> addrspace(3)* %out, <2 x double> addrspace(3)* %in) #0 {
+entry:
+ %ld = load <2 x double>, <2 x double> addrspace(3)* %in
+ store <2 x double> %ld, <2 x double> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v3f64:
+; GCN-DAG: ds_read2_b64
+; GCN-DAG: ds_read_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v3f64(<3 x double> addrspace(3)* %out, <3 x double> addrspace(3)* %in) #0 {
+entry:
+ %ld = load <3 x double>, <3 x double> addrspace(3)* %in
+ store <3 x double> %ld, <3 x double> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v4f64:
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v4f64(<4 x double> addrspace(3)* %out, <4 x double> addrspace(3)* %in) #0 {
+entry:
+ %ld = load <4 x double>, <4 x double> addrspace(3)* %in
+ store <4 x double> %ld, <4 x double> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v8f64:
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v8f64(<8 x double> addrspace(3)* %out, <8 x double> addrspace(3)* %in) #0 {
+entry:
+ %ld = load <8 x double>, <8 x double> addrspace(3)* %in
+ store <8 x double> %ld, <8 x double> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v16f64:
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v16f64(<16 x double> addrspace(3)* %out, <16 x double> addrspace(3)* %in) #0 {
+entry:
+ %ld = load <16 x double>, <16 x double> addrspace(3)* %in
+ store <16 x double> %ld, <16 x double> addrspace(3)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i1.ll b/llvm/test/CodeGen/AMDGPU/load-local-i1.ll
new file mode 100644
index 00000000000..2eed9917b5e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/load-local-i1.ll
@@ -0,0 +1,371 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}local_load_i1:
+; GCN: ds_read_u8
+; GCN: v_and_b32_e32 v{{[0-9]+}}, 1
+; GCN: ds_write_b8
+
+; EG: LDS_UBYTE_READ_RET
+; EG: AND_INT
+; EG: LDS_BYTE_WRITE
+define void @local_load_i1(i1 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
+ %load = load i1, i1 addrspace(3)* %in
+ store i1 %load, i1 addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v2i1:
+define void @local_load_v2i1(<2 x i1> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
+ %load = load <2 x i1>, <2 x i1> addrspace(3)* %in
+ store <2 x i1> %load, <2 x i1> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v3i1:
+define void @local_load_v3i1(<3 x i1> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
+ %load = load <3 x i1>, <3 x i1> addrspace(3)* %in
+ store <3 x i1> %load, <3 x i1> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v4i1:
+define void @local_load_v4i1(<4 x i1> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
+ %load = load <4 x i1>, <4 x i1> addrspace(3)* %in
+ store <4 x i1> %load, <4 x i1> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v8i1:
+define void @local_load_v8i1(<8 x i1> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
+ %load = load <8 x i1>, <8 x i1> addrspace(3)* %in
+ store <8 x i1> %load, <8 x i1> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v16i1:
+define void @local_load_v16i1(<16 x i1> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
+ %load = load <16 x i1>, <16 x i1> addrspace(3)* %in
+ store <16 x i1> %load, <16 x i1> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v32i1:
+define void @local_load_v32i1(<32 x i1> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
+ %load = load <32 x i1>, <32 x i1> addrspace(3)* %in
+ store <32 x i1> %load, <32 x i1> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v64i1:
+define void @local_load_v64i1(<64 x i1> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
+ %load = load <64 x i1>, <64 x i1> addrspace(3)* %in
+ store <64 x i1> %load, <64 x i1> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_i1_to_i32:
+; GCN: ds_read_u8
+; GCN: ds_write_b32
+define void @local_zextload_i1_to_i32(i32 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
+ %a = load i1, i1 addrspace(3)* %in
+ %ext = zext i1 %a to i32
+ store i32 %ext, i32 addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_i1_to_i32:
+; GCN: ds_read_u8
+; GCN: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}}
+; GCN: ds_write_b32
+
+; EG: LDS_UBYTE_READ_RET
+; EG: BFE_INT
+define void @local_sextload_i1_to_i32(i32 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
+ %a = load i1, i1 addrspace(3)* %in
+ %ext = sext i1 %a to i32
+ store i32 %ext, i32 addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v1i1_to_v1i32:
+define void @local_zextload_v1i1_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 {
+ %load = load <1 x i1>, <1 x i1> addrspace(3)* %in
+ %ext = zext <1 x i1> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v1i1_to_v1i32:
+define void @local_sextload_v1i1_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 {
+ %load = load <1 x i1>, <1 x i1> addrspace(3)* %in
+ %ext = sext <1 x i1> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v2i1_to_v2i32:
+define void @local_zextload_v2i1_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
+ %load = load <2 x i1>, <2 x i1> addrspace(3)* %in
+ %ext = zext <2 x i1> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v2i1_to_v2i32:
+define void @local_sextload_v2i1_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
+ %load = load <2 x i1>, <2 x i1> addrspace(3)* %in
+ %ext = sext <2 x i1> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v3i1_to_v3i32:
+define void @local_zextload_v3i1_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
+ %load = load <3 x i1>, <3 x i1> addrspace(3)* %in
+ %ext = zext <3 x i1> %load to <3 x i32>
+ store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v3i1_to_v3i32:
+define void @local_sextload_v3i1_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
+ %load = load <3 x i1>, <3 x i1> addrspace(3)* %in
+ %ext = sext <3 x i1> %load to <3 x i32>
+ store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v4i1_to_v4i32:
+define void @local_zextload_v4i1_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
+ %load = load <4 x i1>, <4 x i1> addrspace(3)* %in
+ %ext = zext <4 x i1> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v4i1_to_v4i32:
+define void @local_sextload_v4i1_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
+ %load = load <4 x i1>, <4 x i1> addrspace(3)* %in
+ %ext = sext <4 x i1> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v8i1_to_v8i32:
+define void @local_zextload_v8i1_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
+ %load = load <8 x i1>, <8 x i1> addrspace(3)* %in
+ %ext = zext <8 x i1> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v8i1_to_v8i32:
+define void @local_sextload_v8i1_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
+ %load = load <8 x i1>, <8 x i1> addrspace(3)* %in
+ %ext = sext <8 x i1> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v16i1_to_v16i32:
+define void @local_zextload_v16i1_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
+ %load = load <16 x i1>, <16 x i1> addrspace(3)* %in
+ %ext = zext <16 x i1> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v16i1_to_v16i32:
+define void @local_sextload_v16i1_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
+ %load = load <16 x i1>, <16 x i1> addrspace(3)* %in
+ %ext = sext <16 x i1> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v32i1_to_v32i32:
+define void @local_zextload_v32i1_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
+ %load = load <32 x i1>, <32 x i1> addrspace(3)* %in
+ %ext = zext <32 x i1> %load to <32 x i32>
+ store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v32i1_to_v32i32:
+define void @local_sextload_v32i1_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
+ %load = load <32 x i1>, <32 x i1> addrspace(3)* %in
+ %ext = sext <32 x i1> %load to <32 x i32>
+ store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v64i1_to_v64i32:
+define void @local_zextload_v64i1_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
+ %load = load <64 x i1>, <64 x i1> addrspace(3)* %in
+ %ext = zext <64 x i1> %load to <64 x i32>
+ store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v64i1_to_v64i32:
+define void @local_sextload_v64i1_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
+ %load = load <64 x i1>, <64 x i1> addrspace(3)* %in
+ %ext = sext <64 x i1> %load to <64 x i32>
+ store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_i1_to_i64:
+; GCN-DAG: ds_read_u8 [[LOAD:v[0-9]+]],
+; GCN-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
+; GCN: ds_write_b64
+define void @local_zextload_i1_to_i64(i64 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
+ %a = load i1, i1 addrspace(3)* %in
+ %ext = zext i1 %a to i64
+ store i64 %ext, i64 addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_i1_to_i64:
+; GCN: ds_read_u8 [[LOAD:v[0-9]+]],
+; GCN: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
+; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
+; GCN: ds_write_b64
+define void @local_sextload_i1_to_i64(i64 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
+ %a = load i1, i1 addrspace(3)* %in
+ %ext = sext i1 %a to i64
+ store i64 %ext, i64 addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v1i1_to_v1i64:
+define void @local_zextload_v1i1_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 {
+ %load = load <1 x i1>, <1 x i1> addrspace(3)* %in
+ %ext = zext <1 x i1> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v1i1_to_v1i64:
+define void @local_sextload_v1i1_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 {
+ %load = load <1 x i1>, <1 x i1> addrspace(3)* %in
+ %ext = sext <1 x i1> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v2i1_to_v2i64:
+define void @local_zextload_v2i1_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
+ %load = load <2 x i1>, <2 x i1> addrspace(3)* %in
+ %ext = zext <2 x i1> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v2i1_to_v2i64:
+define void @local_sextload_v2i1_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 {
+ %load = load <2 x i1>, <2 x i1> addrspace(3)* %in
+ %ext = sext <2 x i1> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v3i1_to_v3i64:
+define void @local_zextload_v3i1_to_v3i64(<3 x i64> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
+ %load = load <3 x i1>, <3 x i1> addrspace(3)* %in
+ %ext = zext <3 x i1> %load to <3 x i64>
+ store <3 x i64> %ext, <3 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v3i1_to_v3i64:
+define void @local_sextload_v3i1_to_v3i64(<3 x i64> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 {
+ %load = load <3 x i1>, <3 x i1> addrspace(3)* %in
+ %ext = sext <3 x i1> %load to <3 x i64>
+ store <3 x i64> %ext, <3 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v4i1_to_v4i64:
+define void @local_zextload_v4i1_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
+ %load = load <4 x i1>, <4 x i1> addrspace(3)* %in
+ %ext = zext <4 x i1> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v4i1_to_v4i64:
+define void @local_sextload_v4i1_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 {
+ %load = load <4 x i1>, <4 x i1> addrspace(3)* %in
+ %ext = sext <4 x i1> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v8i1_to_v8i64:
+define void @local_zextload_v8i1_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
+ %load = load <8 x i1>, <8 x i1> addrspace(3)* %in
+ %ext = zext <8 x i1> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v8i1_to_v8i64:
+define void @local_sextload_v8i1_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 {
+ %load = load <8 x i1>, <8 x i1> addrspace(3)* %in
+ %ext = sext <8 x i1> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v16i1_to_v16i64:
+define void @local_zextload_v16i1_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
+ %load = load <16 x i1>, <16 x i1> addrspace(3)* %in
+ %ext = zext <16 x i1> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v16i1_to_v16i64:
+define void @local_sextload_v16i1_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 {
+ %load = load <16 x i1>, <16 x i1> addrspace(3)* %in
+ %ext = sext <16 x i1> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v32i1_to_v32i64:
+define void @local_zextload_v32i1_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
+ %load = load <32 x i1>, <32 x i1> addrspace(3)* %in
+ %ext = zext <32 x i1> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v32i1_to_v32i64:
+define void @local_sextload_v32i1_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 {
+ %load = load <32 x i1>, <32 x i1> addrspace(3)* %in
+ %ext = sext <32 x i1> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v64i1_to_v64i64:
+define void @local_zextload_v64i1_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
+ %load = load <64 x i1>, <64 x i1> addrspace(3)* %in
+ %ext = zext <64 x i1> %load to <64 x i64>
+ store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v64i1_to_v64i64:
+define void @local_sextload_v64i1_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 {
+ %load = load <64 x i1>, <64 x i1> addrspace(3)* %in
+ %ext = sext <64 x i1> %load to <64 x i64>
+ store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
new file mode 100644
index 00000000000..6db67d77425
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
@@ -0,0 +1,608 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}local_load_i16:
+; GCN: ds_read_u16 v{{[0-9]+}}
+
+; EG: LDS_USHORT_READ_RET
+define void @local_load_i16(i16 addrspace(3)* %out, i16 addrspace(3)* %in) {
+entry:
+ %ld = load i16, i16 addrspace(3)* %in
+ store i16 %ld, i16 addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v2i16:
+; GCN: ds_read_b32
+
+; EG: LDS_READ_RET
+define void @local_load_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) {
+entry:
+ %ld = load <2 x i16>, <2 x i16> addrspace(3)* %in
+ store <2 x i16> %ld, <2 x i16> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v3i16:
+; GCN: ds_read_b64
+; GCN-DAG: ds_write_b32
+; GCN-DAG: ds_write_b16
+
+; EG-DAG: LDS_USHORT_READ_RET
+; EG-DAG: LDS_READ_RET
+define void @local_load_v3i16(<3 x i16> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
+entry:
+ %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
+ store <3 x i16> %ld, <3 x i16> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v4i16:
+; GCN: ds_read_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v4i16(<4 x i16> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) {
+entry:
+ %ld = load <4 x i16>, <4 x i16> addrspace(3)* %in
+ store <4 x i16> %ld, <4 x i16> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v8i16:
+; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1{{$}}
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v8i16(<8 x i16> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) {
+entry:
+ %ld = load <8 x i16>, <8 x i16> addrspace(3)* %in
+ store <8 x i16> %ld, <8 x i16> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v16i16:
+; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:3 offset1:2{{$}}
+; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1{{$}}
+
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v16i16(<16 x i16> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) {
+entry:
+ %ld = load <16 x i16>, <16 x i16> addrspace(3)* %in
+ store <16 x i16> %ld, <16 x i16> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_i16_to_i32:
+; GCN: ds_read_u16
+; GCN: ds_write_b32
+
+; EG: LDS_USHORT_READ_RET
+define void @local_zextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
+ %a = load i16, i16 addrspace(3)* %in
+ %ext = zext i16 %a to i32
+ store i32 %ext, i32 addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_i16_to_i32:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_i16
+
+; EG: LDS_USHORT_READ_RET
+; EG: BFE_INT
+define void @local_sextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
+ %a = load i16, i16 addrspace(3)* %in
+ %ext = sext i16 %a to i32
+ store i32 %ext, i32 addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i32:
+; GCN: ds_read_u16
+define void @local_zextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
+ %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
+ %ext = zext <1 x i16> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i32:
+; GCN: ds_read_i16
+define void @local_sextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
+ %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
+ %ext = sext <1 x i16> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i32:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+
+; EG: LDS_USHORT_READ_RET
+; EG: LDS_USHORT_READ_RET
+define void @local_zextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
+ %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
+ %ext = zext <2 x i16> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i32:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+
+; EG-DAG: LDS_USHORT_READ_RET
+; EG-DAG: LDS_USHORT_READ_RET
+; EG-DAG: BFE_INT
+; EG-DAG: BFE_INT
+define void @local_sextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
+ %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
+ %ext = sext <2 x i16> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_local_zextload_v3i16_to_v3i32:
+; GCN: ds_read_b64
+; GCN-DAG: ds_write_b32
+; GCN-DAG: ds_write_b64
+define void @local_local_zextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
+entry:
+ %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
+ %ext = zext <3 x i16> %ld to <3 x i32>
+ store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_local_sextload_v3i16_to_v3i32:
+; GCN: ds_read_b64
+; GCN-DAG: ds_write_b32
+; GCN-DAG: ds_write_b64
+define void @local_local_sextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
+entry:
+ %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
+ %ext = sext <3 x i16> %ld to <3 x i32>
+ store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_local_zextload_v4i16_to_v4i32:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+
+; EG: LDS_USHORT_READ_RET
+; EG: LDS_USHORT_READ_RET
+; EG: LDS_USHORT_READ_RET
+; EG: LDS_USHORT_READ_RET
+define void @local_local_zextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
+ %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
+ %ext = zext <4 x i16> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i32:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+
+; EG-DAG: LDS_USHORT_READ_RET
+; EG-DAG: LDS_USHORT_READ_RET
+; EG-DAG: LDS_USHORT_READ_RET
+; EG-DAG: LDS_USHORT_READ_RET
+; EG-DAG: BFE_INT
+; EG-DAG: BFE_INT
+; EG-DAG: BFE_INT
+; EG-DAG: BFE_INT
+define void @local_sextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
+ %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
+ %ext = sext <4 x i16> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i32:
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+define void @local_zextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
+ %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
+ %ext = zext <8 x i16> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i32:
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+define void @local_sextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
+ %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
+ %ext = sext <8 x i16> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i32:
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+define void @local_zextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
+ %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
+ %ext = zext <16 x i16> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i32:
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+define void @local_sextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
+ %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
+ %ext = sext <16 x i16> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i32:
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+
+define void @local_zextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
+ %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
+ %ext = zext <32 x i16> %load to <32 x i32>
+ store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i32:
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+; GCN: ds_read_i16
+define void @local_sextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
+ %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
+ %ext = sext <32 x i16> %load to <32 x i32>
+ store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i32:
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+
+define void @local_zextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
+ %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
+ %ext = zext <64 x i16> %load to <64 x i32>
+ store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i32:
+define void @local_sextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
+ %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
+ %ext = sext <64 x i16> %load to <64 x i32>
+ store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_i16_to_i64:
+; GCN-DAG: ds_read_u16 v[[LO:[0-9]+]],
+; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+
+; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]]
+define void @local_zextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
+ %a = load i16, i16 addrspace(3)* %in
+ %ext = zext i16 %a to i64
+ store i64 %ext, i64 addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_i16_to_i64:
+; GCN: ds_read_i16 v[[LO:[0-9]+]],
+; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
+
+; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]]
+define void @local_sextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
+ %a = load i16, i16 addrspace(3)* %in
+ %ext = sext i16 %a to i64
+ store i64 %ext, i64 addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i64:
+define void @local_zextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
+ %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
+ %ext = zext <1 x i16> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i64:
+define void @local_sextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
+ %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
+ %ext = sext <1 x i16> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i64:
+define void @local_zextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
+ %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
+ %ext = zext <2 x i16> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i64:
+define void @local_sextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
+ %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
+ %ext = sext <2 x i16> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v4i16_to_v4i64:
+define void @local_zextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
+ %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
+ %ext = zext <4 x i16> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i64:
+define void @local_sextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
+ %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
+ %ext = sext <4 x i16> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i64:
+define void @local_zextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
+ %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
+ %ext = zext <8 x i16> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i64:
+define void @local_sextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
+ %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
+ %ext = sext <8 x i16> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i64:
+define void @local_zextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
+ %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
+ %ext = zext <16 x i16> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i64:
+define void @local_sextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
+ %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
+ %ext = sext <16 x i16> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i64:
+define void @local_zextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
+ %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
+ %ext = zext <32 x i16> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i64:
+define void @local_sextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
+ %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
+ %ext = sext <32 x i16> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
+ ret void
+}
+
+; ; XFUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i64:
+; define void @local_zextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
+; %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
+; %ext = zext <64 x i16> %load to <64 x i64>
+; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
+; ret void
+; }
+
+; ; XFUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i64:
+; define void @local_sextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
+; %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
+; %ext = sext <64 x i16> %load to <64 x i64>
+; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
+; ret void
+; }
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i32.ll b/llvm/test/CodeGen/AMDGPU/load-local-i32.ll
new file mode 100644
index 00000000000..24bdb15d473
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/load-local-i32.ll
@@ -0,0 +1,182 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+
+; FUNC-LABEL: {{^}}local_load_i32:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0, -1
+; GCN: ds_read_b32
+
+; EG: LDS_READ_RET
+define void @local_load_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
+entry:
+ %ld = load i32, i32 addrspace(3)* %in
+ store i32 %ld, i32 addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v2i32:
+; GCN: ds_read_b64
+define void @local_load_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
+entry:
+ %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in
+ store <2 x i32> %ld, <2 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v3i32:
+; GCN-DAG: ds_read_b64
+; GCN-DAG: ds_read_b32
+define void @local_load_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> addrspace(3)* %in) #0 {
+entry:
+ %ld = load <3 x i32>, <3 x i32> addrspace(3)* %in
+ store <3 x i32> %ld, <3 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v4i32:
+; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1{{$}}
+
+define void @local_load_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
+entry:
+ %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in
+ store <4 x i32> %ld, <4 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v8i32:
+; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:3 offset1:2{{$}}
+; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1{{$}}
+define void @local_load_v8i32(<8 x i32> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
+entry:
+ %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in
+ store <8 x i32> %ld, <8 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v16i32:
+; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7{{$}}
+; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:3 offset1:4{{$}}
+; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:5{{$}}
+; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1 offset1:2{{$}}
+define void @local_load_v16i32(<16 x i32> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
+entry:
+ %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in
+ store <16 x i32> %ld, <16 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_i32_to_i64:
+define void @local_zextload_i32_to_i64(i64 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
+ %ld = load i32, i32 addrspace(3)* %in
+ %ext = zext i32 %ld to i64
+ store i64 %ext, i64 addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_i32_to_i64:
+define void @local_sextload_i32_to_i64(i64 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
+ %ld = load i32, i32 addrspace(3)* %in
+ %ext = sext i32 %ld to i64
+ store i64 %ext, i64 addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v1i32_to_v1i64:
+define void @local_zextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i32> addrspace(3)* %in) #0 {
+ %ld = load <1 x i32>, <1 x i32> addrspace(3)* %in
+ %ext = zext <1 x i32> %ld to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v1i32_to_v1i64:
+define void @local_sextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i32> addrspace(3)* %in) #0 {
+ %ld = load <1 x i32>, <1 x i32> addrspace(3)* %in
+ %ext = sext <1 x i32> %ld to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v2i32_to_v2i64:
+define void @local_zextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
+ %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in
+ %ext = zext <2 x i32> %ld to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v2i32_to_v2i64:
+define void @local_sextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
+ %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in
+ %ext = sext <2 x i32> %ld to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v4i32_to_v4i64:
+define void @local_zextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
+ %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in
+ %ext = zext <4 x i32> %ld to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v4i32_to_v4i64:
+define void @local_sextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
+ %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in
+ %ext = sext <4 x i32> %ld to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v8i32_to_v8i64:
+define void @local_zextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
+ %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in
+ %ext = zext <8 x i32> %ld to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v8i32_to_v8i64:
+define void @local_sextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
+ %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in
+ %ext = sext <8 x i32> %ld to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v16i32_to_v16i64:
+define void @local_sextload_v16i32_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
+ %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in
+ %ext = sext <16 x i32> %ld to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v16i32_to_v16i64
+define void @local_zextload_v16i32_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
+ %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in
+ %ext = zext <16 x i32> %ld to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v32i32_to_v32i64:
+define void @local_sextload_v32i32_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i32> addrspace(3)* %in) #0 {
+ %ld = load <32 x i32>, <32 x i32> addrspace(3)* %in
+ %ext = sext <32 x i32> %ld to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v32i32_to_v32i64:
+define void @local_zextload_v32i32_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i32> addrspace(3)* %in) #0 {
+ %ld = load <32 x i32>, <32 x i32> addrspace(3)* %in
+ %ext = zext <32 x i32> %ld to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i64.ll b/llvm/test/CodeGen/AMDGPU/load-local-i64.ll
new file mode 100644
index 00000000000..180807df7b9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/load-local-i64.ll
@@ -0,0 +1,154 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}local_load_i64:
+; GCN: ds_read_b64 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}{{$}}
+; GCN: ds_write_b64 v{{[0-9]+}}, [[VAL]]
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_i64(i64 addrspace(3)* %out, i64 addrspace(3)* %in) #0 {
+ %ld = load i64, i64 addrspace(3)* %in
+ store i64 %ld, i64 addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v2i64:
+; GCN: ds_read2_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v2i64(<2 x i64> addrspace(3)* %out, <2 x i64> addrspace(3)* %in) #0 {
+entry:
+ %ld = load <2 x i64>, <2 x i64> addrspace(3)* %in
+ store <2 x i64> %ld, <2 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v3i64:
+; GCN-DAG: ds_read2_b64
+; GCN-DAG: ds_read_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v3i64(<3 x i64> addrspace(3)* %out, <3 x i64> addrspace(3)* %in) #0 {
+entry:
+ %ld = load <3 x i64>, <3 x i64> addrspace(3)* %in
+ store <3 x i64> %ld, <3 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v4i64:
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v4i64(<4 x i64> addrspace(3)* %out, <4 x i64> addrspace(3)* %in) #0 {
+entry:
+ %ld = load <4 x i64>, <4 x i64> addrspace(3)* %in
+ store <4 x i64> %ld, <4 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v8i64:
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v8i64(<8 x i64> addrspace(3)* %out, <8 x i64> addrspace(3)* %in) #0 {
+entry:
+ %ld = load <8 x i64>, <8 x i64> addrspace(3)* %in
+ store <8 x i64> %ld, <8 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v16i64:
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+; GCN: ds_read2_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v16i64(<16 x i64> addrspace(3)* %out, <16 x i64> addrspace(3)* %in) #0 {
+entry:
+ %ld = load <16 x i64>, <16 x i64> addrspace(3)* %in
+ store <16 x i64> %ld, <16 x i64> addrspace(3)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i8.ll b/llvm/test/CodeGen/AMDGPU/load-local-i8.ll
new file mode 100644
index 00000000000..423fddac3cf
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/load-local-i8.ll
@@ -0,0 +1,562 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+
+; FUNC-LABEL: {{^}}local_load_i8:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_u8
+
+; EG: LDS_UBYTE_READ_RET
+define void @local_load_i8(i8 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
+entry:
+ %ld = load i8, i8 addrspace(3)* %in
+ store i8 %ld, i8 addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v2i8:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_u16
+
+; EG: LDS_USHORT_READ_RET
+define void @local_load_v2i8(<2 x i8> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
+entry:
+ %ld = load <2 x i8>, <2 x i8> addrspace(3)* %in
+ store <2 x i8> %ld, <2 x i8> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v3i8:
+; GCN: ds_read_b32
+
+; EG: DS_READ_RET
+define void @local_load_v3i8(<3 x i8> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 {
+entry:
+ %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in
+ store <3 x i8> %ld, <3 x i8> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v4i8:
+; GCN: ds_read_b32
+
+; EG: LDS_READ_RET
+define void @local_load_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
+entry:
+ %ld = load <4 x i8>, <4 x i8> addrspace(3)* %in
+ store <4 x i8> %ld, <4 x i8> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v8i8:
+; GCN: ds_read_b64
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v8i8(<8 x i8> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
+entry:
+ %ld = load <8 x i8>, <8 x i8> addrspace(3)* %in
+ store <8 x i8> %ld, <8 x i8> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_load_v16i8:
+; GCN: ds_read2_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1{{$}}
+; GCN: ds_write2_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:{{[0-9]+}}], v[{{[0-9]+}}:[[HI]]{{\]}} offset0:1{{$}}
+
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+; EG: LDS_READ_RET
+define void @local_load_v16i8(<16 x i8> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
+entry:
+ %ld = load <16 x i8>, <16 x i8> addrspace(3)* %in
+ store <16 x i8> %ld, <16 x i8> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_i8_to_i32:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_u8
+
+; EG: LDS_UBYTE_READ_RET
+define void @local_zextload_i8_to_i32(i32 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
+ %a = load i8, i8 addrspace(3)* %in
+ %ext = zext i8 %a to i32
+ store i32 %ext, i32 addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_i8_to_i32:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_i8
+
+; EG: LDS_UBYTE_READ_RET
+; EG: BFE_INT
+define void @local_sextload_i8_to_i32(i32 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
+ %ld = load i8, i8 addrspace(3)* %in
+ %ext = sext i8 %ld to i32
+ store i32 %ext, i32 addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i32:
+define void @local_zextload_v1i8_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
+ %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
+ %ext = zext <1 x i8> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i32:
+define void @local_sextload_v1i8_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
+ %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
+ %ext = sext <1 x i8> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i32:
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+
+; EG: LDS_UBYTE_READ_RET
+; EG: LDS_UBYTE_READ_RET
+define void @local_zextload_v2i8_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
+ %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
+ %ext = zext <2 x i8> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i32:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_i8
+; GCN: ds_read_i8
+
+; EG-DAG: LDS_UBYTE_READ_RET
+; EG-DAG: LDS_UBYTE_READ_RET
+; EG-DAG: BFE_INT
+; EG-DAG: BFE_INT
+define void @local_sextload_v2i8_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
+ %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
+ %ext = sext <2 x i8> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v3i8_to_v3i32:
+; GCN: ds_read_b32
+
+; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8
+; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8
+; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xff,
+define void @local_zextload_v3i8_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 {
+entry:
+ %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in
+ %ext = zext <3 x i8> %ld to <3 x i32>
+ store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v3i8_to_v3i32:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_b32
+
+; GCN-DAG: v_bfe_i32
+; GCN-DAG: v_bfe_i32
+; GCN-DAG: v_bfe_i32
+; GCN-DAG: v_bfe_i32
+
+; GCN-DAG: ds_write_b64
+; GCN-DAG: ds_write_b32
+
+define void @local_sextload_v3i8_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 {
+entry:
+ %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in
+ %ext = sext <3 x i8> %ld to <3 x i32>
+ store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i32:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+
+; EG: LDS_UBYTE_READ_RET
+; EG: LDS_UBYTE_READ_RET
+; EG: LDS_UBYTE_READ_RET
+; EG: LDS_UBYTE_READ_RET
+define void @local_zextload_v4i8_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
+ %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
+ %ext = zext <4 x i8> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i32:
+; GCN-NOT: s_wqm_b64
+; GCN: s_mov_b32 m0
+; GCN: ds_read_i8
+; GCN: ds_read_i8
+; GCN: ds_read_i8
+; GCN: ds_read_i8
+
+; EG-DAG: LDS_UBYTE_READ_RET
+; EG-DAG: LDS_UBYTE_READ_RET
+; EG-DAG: LDS_UBYTE_READ_RET
+; EG-DAG: LDS_UBYTE_READ_RET
+; EG-DAG: BFE_INT
+; EG-DAG: BFE_INT
+; EG-DAG: BFE_INT
+; EG-DAG: BFE_INT
+define void @local_sextload_v4i8_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
+ %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
+ %ext = sext <4 x i8> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i32:
+define void @local_zextload_v8i8_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
+ %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
+ %ext = zext <8 x i8> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i32:
+define void @local_sextload_v8i8_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
+ %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
+ %ext = sext <8 x i8> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i32:
+define void @local_zextload_v16i8_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
+ %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
+ %ext = zext <16 x i8> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i32:
+define void @local_sextload_v16i8_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
+ %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
+ %ext = sext <16 x i8> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i32:
+define void @local_zextload_v32i8_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
+ %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
+ %ext = zext <32 x i8> %load to <32 x i32>
+ store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i32:
+define void @local_sextload_v32i8_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
+ %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
+ %ext = sext <32 x i8> %load to <32 x i32>
+ store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i32:
+define void @local_zextload_v64i8_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
+ %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
+ %ext = zext <64 x i8> %load to <64 x i32>
+ store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i32:
+define void @local_sextload_v64i8_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
+ %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
+ %ext = sext <64 x i8> %load to <64 x i32>
+ store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_i8_to_i64:
+; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+; GCN-DAG: ds_read_u8 v[[LO:[0-9]+]],
+; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]]
+define void @local_zextload_i8_to_i64(i64 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
+ %a = load i8, i8 addrspace(3)* %in
+ %ext = zext i8 %a to i64
+ store i64 %ext, i64 addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_i8_to_i64:
+; GCN: ds_read_i8 v[[LO:[0-9]+]],
+; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
+
+; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @local_sextload_i8_to_i64(i64 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
+ %a = load i8, i8 addrspace(3)* %in
+ %ext = sext i8 %a to i64
+ store i64 %ext, i64 addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i64:
+define void @local_zextload_v1i8_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
+ %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
+ %ext = zext <1 x i8> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i64:
+define void @local_sextload_v1i8_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
+ %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
+ %ext = sext <1 x i8> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i64:
+define void @local_zextload_v2i8_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
+ %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
+ %ext = zext <2 x i8> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i64:
+define void @local_sextload_v2i8_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
+ %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
+ %ext = sext <2 x i8> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i64:
+define void @local_zextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
+ %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
+ %ext = zext <4 x i8> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i64:
+define void @local_sextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
+ %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
+ %ext = sext <4 x i8> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i64:
+define void @local_zextload_v8i8_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
+ %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
+ %ext = zext <8 x i8> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i64:
+define void @local_sextload_v8i8_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
+ %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
+ %ext = sext <8 x i8> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i64:
+define void @local_zextload_v16i8_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
+ %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
+ %ext = zext <16 x i8> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i64:
+define void @local_sextload_v16i8_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
+ %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
+ %ext = sext <16 x i8> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i64:
+define void @local_zextload_v32i8_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
+ %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
+ %ext = zext <32 x i8> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i64:
+define void @local_sextload_v32i8_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
+ %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
+ %ext = sext <32 x i8> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
+ ret void
+}
+
+; XFUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i64:
+; define void @local_zextload_v64i8_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
+; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
+; %ext = zext <64 x i8> %load to <64 x i64>
+; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i64:
+; define void @local_sextload_v64i8_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
+; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
+; %ext = sext <64 x i8> %load to <64 x i64>
+; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
+; ret void
+; }
+
+; FUNC-LABEL: {{^}}local_zextload_i8_to_i16:
+; GCN: ds_read_u8 v[[VAL:[0-9]+]],
+; GCN: ds_write_b16 v[[VAL:[0-9]+]]
+define void @local_zextload_i8_to_i16(i16 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
+ %a = load i8, i8 addrspace(3)* %in
+ %ext = zext i8 %a to i16
+ store i16 %ext, i16 addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_i8_to_i16:
+; GCN: ds_read_i8 v[[VAL:[0-9]+]],
+; GCN: ds_write_b16 v{{[0-9]+}}, v[[VAL]]
+define void @local_sextload_i8_to_i16(i16 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
+ %a = load i8, i8 addrspace(3)* %in
+ %ext = sext i8 %a to i16
+ store i16 %ext, i16 addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i16:
+define void @local_zextload_v1i8_to_v1i16(<1 x i16> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
+ %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
+ %ext = zext <1 x i8> %load to <1 x i16>
+ store <1 x i16> %ext, <1 x i16> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i16:
+define void @local_sextload_v1i8_to_v1i16(<1 x i16> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
+ %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
+ %ext = sext <1 x i8> %load to <1 x i16>
+ store <1 x i16> %ext, <1 x i16> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i16:
+define void @local_zextload_v2i8_to_v2i16(<2 x i16> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
+ %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
+ %ext = zext <2 x i8> %load to <2 x i16>
+ store <2 x i16> %ext, <2 x i16> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i16:
+define void @local_sextload_v2i8_to_v2i16(<2 x i16> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
+ %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
+ %ext = sext <2 x i8> %load to <2 x i16>
+ store <2 x i16> %ext, <2 x i16> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i16:
+define void @local_zextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
+ %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
+ %ext = zext <4 x i8> %load to <4 x i16>
+ store <4 x i16> %ext, <4 x i16> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i16:
+define void @local_sextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
+ %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
+ %ext = sext <4 x i8> %load to <4 x i16>
+ store <4 x i16> %ext, <4 x i16> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i16:
+define void @local_zextload_v8i8_to_v8i16(<8 x i16> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
+ %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
+ %ext = zext <8 x i8> %load to <8 x i16>
+ store <8 x i16> %ext, <8 x i16> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i16:
+define void @local_sextload_v8i8_to_v8i16(<8 x i16> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
+ %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
+ %ext = sext <8 x i8> %load to <8 x i16>
+ store <8 x i16> %ext, <8 x i16> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i16:
+define void @local_zextload_v16i8_to_v16i16(<16 x i16> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
+ %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
+ %ext = zext <16 x i8> %load to <16 x i16>
+ store <16 x i16> %ext, <16 x i16> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i16:
+define void @local_sextload_v16i8_to_v16i16(<16 x i16> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
+ %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
+ %ext = sext <16 x i8> %load to <16 x i16>
+ store <16 x i16> %ext, <16 x i16> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i16:
+define void @local_zextload_v32i8_to_v32i16(<32 x i16> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
+ %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
+ %ext = zext <32 x i8> %load to <32 x i16>
+ store <32 x i16> %ext, <32 x i16> addrspace(3)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i16:
+define void @local_sextload_v32i8_to_v32i16(<32 x i16> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
+ %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
+ %ext = sext <32 x i8> %load to <32 x i16>
+ store <32 x i16> %ext, <32 x i16> addrspace(3)* %out
+ ret void
+}
+
+; XFUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i16:
+; define void @local_zextload_v64i8_to_v64i16(<64 x i16> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
+; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
+; %ext = zext <64 x i8> %load to <64 x i16>
+; store <64 x i16> %ext, <64 x i16> addrspace(3)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i16:
+; define void @local_sextload_v64i8_to_v64i16(<64 x i16> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
+; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
+; %ext = sext <64 x i8> %load to <64 x i16>
+; store <64 x i16> %ext, <64 x i16> addrspace(3)* %out
+; ret void
+; }
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/load.ll b/llvm/test/CodeGen/AMDGPU/load.ll
deleted file mode 100644
index d2d408f06d6..00000000000
--- a/llvm/test/CodeGen/AMDGPU/load.ll
+++ /dev/null
@@ -1,750 +0,0 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC %s
-; RUN: llc -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=CI-HSA -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
-
-;===------------------------------------------------------------------------===;
-; GLOBAL ADDRESS SPACE
-;===------------------------------------------------------------------------===;
-
-; Load an i8 value from the global address space.
-; FUNC-LABEL: {{^}}load_i8:
-; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-
-; SI-NOHSA: buffer_load_ubyte v{{[0-9]+}},
-; CI-HSA: flat_load_ubyte
-define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
- %1 = load i8, i8 addrspace(1)* %in
- %2 = zext i8 %1 to i32
- store i32 %2, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_i8_sext:
-; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
-; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
-; R600: 8
-; SI-NOHSA: buffer_load_sbyte
-; CI-HSA: flat_load_sbyte
-define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
-entry:
- %0 = load i8, i8 addrspace(1)* %in
- %1 = sext i8 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v2i8:
-; R600: VTX_READ_8
-; R600: VTX_READ_8
-; SI-NOHSA: buffer_load_ubyte
-; SI-NOHSA: buffer_load_ubyte
-; CI-HSA: flat_load_ubyte
-; CI-HSA: flat_load_ubyte
-define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
-entry:
- %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
- %1 = zext <2 x i8> %0 to <2 x i32>
- store <2 x i32> %1, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v2i8_sext:
-; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
-; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
-; R600-DAG: 8
-; R600-DAG: 8
-
-; SI-NOHSA: buffer_load_sbyte
-; SI-NOHSA: buffer_load_sbyte
-; CI-HSA: flat_load_sbyte
-; CI-HSA: flat_load_sbyte
-define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
-entry:
- %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
- %1 = sext <2 x i8> %0 to <2 x i32>
- store <2 x i32> %1, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v4i8:
-; R600: VTX_READ_8
-; R600: VTX_READ_8
-; R600: VTX_READ_8
-; R600: VTX_READ_8
-; SI-NOHSA: buffer_load_ubyte
-; SI-NOHSA: buffer_load_ubyte
-; SI-NOHSA: buffer_load_ubyte
-; SI-NOHSA: buffer_load_ubyte
-; CI-HSA: flat_load_ubyte
-; CI-HSA: flat_load_ubyte
-; CI-HSA: flat_load_ubyte
-; CI-HSA: flat_load_ubyte
-define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
-entry:
- %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
- %1 = zext <4 x i8> %0 to <4 x i32>
- store <4 x i32> %1, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v4i8_sext:
-; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
-; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
-; R600-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
-; R600-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
-; R600-DAG: 8
-; R600-DAG: 8
-; R600-DAG: 8
-; R600-DAG: 8
-; SI-NOHSA: buffer_load_sbyte
-; SI-NOHSA: buffer_load_sbyte
-; SI-NOHSA: buffer_load_sbyte
-; SI-NOHSA: buffer_load_sbyte
-; CI-HSA: flat_load_sbyte
-; CI-HSA: flat_load_sbyte
-; CI-HSA: flat_load_sbyte
-; CI-HSA: flat_load_sbyte
-define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
-entry:
- %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
- %1 = sext <4 x i8> %0 to <4 x i32>
- store <4 x i32> %1, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; Load an i16 value from the global address space.
-; FUNC-LABEL: {{^}}load_i16:
-; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-NOHSA: buffer_load_ushort
-; CI-HSA: flat_load_ushort
-define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
-entry:
- %0 = load i16 , i16 addrspace(1)* %in
- %1 = zext i16 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_i16_sext:
-; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
-; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
-; R600: 16
-; SI-NOHSA: buffer_load_sshort
-; CI-HSA: flat_load_sshort
-define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
-entry:
- %0 = load i16, i16 addrspace(1)* %in
- %1 = sext i16 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v2i16:
-; R600: VTX_READ_16
-; R600: VTX_READ_16
-; SI-NOHSA: buffer_load_ushort
-; SI-NOHSA: buffer_load_ushort
-; CI-HSA: flat_load_ushort
-; CI-HSA: flat_load_ushort
-define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
-entry:
- %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
- %1 = zext <2 x i16> %0 to <2 x i32>
- store <2 x i32> %1, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v2i16_sext:
-; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
-; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
-; R600-DAG: 16
-; R600-DAG: 16
-; SI-NOHSA: buffer_load_sshort
-; SI-NOHSA: buffer_load_sshort
-; CI-HSA: flat_load_sshort
-; CI-HSA: flat_load_sshort
-define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
-entry:
- %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
- %1 = sext <2 x i16> %0 to <2 x i32>
- store <2 x i32> %1, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}global_load_v3i16:
-; SI-NOHSA: buffer_load_dwordx2 v
-; SI-HSA: flat_load_dwordx2 v
-
-; R600-DAG: VTX_READ_32
-; R600-DAG: VTX_READ_16
-define void @global_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
-entry:
- %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
- store <3 x i16> %ld, <3 x i16> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v4i16:
-; R600: VTX_READ_16
-; R600: VTX_READ_16
-; R600: VTX_READ_16
-; R600: VTX_READ_16
-; SI-NOHSA: buffer_load_ushort
-; SI-NOHSA: buffer_load_ushort
-; SI-NOHSA: buffer_load_ushort
-; SI-NOHSA: buffer_load_ushort
-; CI-HSA: flat_load_ushort
-; CI-HSA: flat_load_ushort
-; CI-HSA: flat_load_ushort
-; CI-HSA: flat_load_ushort
-define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
-entry:
- %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
- %1 = zext <4 x i16> %0 to <4 x i32>
- store <4 x i32> %1, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v4i16_sext:
-; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
-; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
-; R600-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
-; R600-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
-; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
-; R600-DAG: 16
-; R600-DAG: 16
-; R600-DAG: 16
-; R600-DAG: 16
-; SI-NOHSA: buffer_load_sshort
-; SI-NOHSA: buffer_load_sshort
-; SI-NOHSA: buffer_load_sshort
-; SI-NOHSA: buffer_load_sshort
-; CI-HSA: flat_load_sshort
-; CI-HSA: flat_load_sshort
-; CI-HSA: flat_load_sshort
-; CI-HSA: flat_load_sshort
-define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
-entry:
- %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
- %1 = sext <4 x i16> %0 to <4 x i32>
- store <4 x i32> %1, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; load an i32 value from the global address space.
-; FUNC-LABEL: {{^}}load_i32:
-; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-
-; SI-NOHSA: buffer_load_dword v{{[0-9]+}}
-; CI-HSA: flat_load_dword
-define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
-entry:
- %0 = load i32, i32 addrspace(1)* %in
- store i32 %0, i32 addrspace(1)* %out
- ret void
-}
-
-; load a f32 value from the global address space.
-; FUNC-LABEL: {{^}}load_f32:
-; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-
-; SI-NOHSA: buffer_load_dword v{{[0-9]+}}
-; CI-HSA: flat_load_dword
-define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
-entry:
- %0 = load float, float addrspace(1)* %in
- store float %0, float addrspace(1)* %out
- ret void
-}
-
-; load a v2f32 value from the global address space
-; FUNC-LABEL: {{^}}load_v2f32:
-; R600: MEM_RAT
-; R600: VTX_READ_64
-; SI-NOHSA: buffer_load_dwordx2
-; CI-HSA: flat_load_dwordx2
-define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
-entry:
- %0 = load <2 x float>, <2 x float> addrspace(1)* %in
- store <2 x float> %0, <2 x float> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_i64:
-; R600: VTX_READ_64
-; SI-NOHSA: buffer_load_dwordx2
-; CI-HSA: flat_load_dwordx2
-define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
-entry:
- %0 = load i64, i64 addrspace(1)* %in
- store i64 %0, i64 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_i64_sext:
-; R600: MEM_RAT
-; R600: MEM_RAT
-; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x
-; R600: 31
-; SI-NOHSA: buffer_load_dword
-; CI-HSA: flat_load_dword
-
-define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
-entry:
- %0 = load i32, i32 addrspace(1)* %in
- %1 = sext i32 %0 to i64
- store i64 %1, i64 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_i64_zext:
-; R600: MEM_RAT
-; R600: MEM_RAT
-define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
-entry:
- %0 = load i32, i32 addrspace(1)* %in
- %1 = zext i32 %0 to i64
- store i64 %1, i64 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v8i32:
-; R600: VTX_READ_128
-; R600: VTX_READ_128
-
-; SI-NOHSA: buffer_load_dwordx4
-; SI-NOHSA: buffer_load_dwordx4
-; CI-HSA: flat_load_dwordx4
-; CI-HSA: flat_load_dwordx4
-define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) {
-entry:
- %0 = load <8 x i32>, <8 x i32> addrspace(1)* %in
- store <8 x i32> %0, <8 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v16i32:
-; R600: VTX_READ_128
-; R600: VTX_READ_128
-; R600: VTX_READ_128
-; R600: VTX_READ_128
-
-; SI-NOHSA: buffer_load_dwordx4
-; SI-NOHSA: buffer_load_dwordx4
-; SI-NOHSA: buffer_load_dwordx4
-; SI-NOHSA: buffer_load_dwordx4
-; CI-HSA: flat_load_dwordx4
-; CI-HSA: flat_load_dwordx4
-; CI-HSA: flat_load_dwordx4
-; CI-HSA: flat_load_dwordx4
-define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) {
-entry:
- %0 = load <16 x i32>, <16 x i32> addrspace(1)* %in
- store <16 x i32> %0, <16 x i32> addrspace(1)* %out
- ret void
-}
-
-;===------------------------------------------------------------------------===;
-; CONSTANT ADDRESS SPACE
-;===------------------------------------------------------------------------===;
-
-; Load a sign-extended i8 value
-; FUNC-LABEL: {{^}}load_const_i8_sext:
-; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
-; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
-; R600: 8
-; SI-NOHSA: buffer_load_sbyte v{{[0-9]+}},
-; CI-HSA: flat_load_sbyte v{{[0-9]+}},
-define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
-entry:
- %0 = load i8, i8 addrspace(2)* %in
- %1 = sext i8 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; Load an aligned i8 value
-; FUNC-LABEL: {{^}}load_const_i8_aligned:
-; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-NOHSA: buffer_load_ubyte v{{[0-9]+}},
-; CI-HSA: flat_load_ubyte v{{[0-9]+}},
-define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
-entry:
- %0 = load i8, i8 addrspace(2)* %in
- %1 = zext i8 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; Load an un-aligned i8 value
-; FUNC-LABEL: {{^}}load_const_i8_unaligned:
-; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-NOHSA: buffer_load_ubyte v{{[0-9]+}},
-; CI-HSA: flat_load_ubyte v{{[0-9]+}},
-define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
-entry:
- %0 = getelementptr i8, i8 addrspace(2)* %in, i32 1
- %1 = load i8, i8 addrspace(2)* %0
- %2 = zext i8 %1 to i32
- store i32 %2, i32 addrspace(1)* %out
- ret void
-}
-
-; Load a sign-extended i16 value
-; FUNC-LABEL: {{^}}load_const_i16_sext:
-; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
-; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
-; R600: 16
-; SI-NOHSA: buffer_load_sshort
-; CI-HSA: flat_load_sshort
-define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
-entry:
- %0 = load i16, i16 addrspace(2)* %in
- %1 = sext i16 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; Load an aligned i16 value
-; FUNC-LABEL: {{^}}load_const_i16_aligned:
-; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-NOHSA: buffer_load_ushort
-; CI-HSA: flat_load_ushort
-define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
-entry:
- %0 = load i16, i16 addrspace(2)* %in
- %1 = zext i16 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; Load an un-aligned i16 value
-; FUNC-LABEL: {{^}}load_const_i16_unaligned:
-; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-NOHSA: buffer_load_ushort
-; CI-HSA: flat_load_ushort
-define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
-entry:
- %0 = getelementptr i16, i16 addrspace(2)* %in, i32 1
- %1 = load i16, i16 addrspace(2)* %0
- %2 = zext i16 %1 to i32
- store i32 %2, i32 addrspace(1)* %out
- ret void
-}
-
-; Load an i32 value from the constant address space.
-; FUNC-LABEL: {{^}}load_const_addrspace_i32:
-; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-
-; SI: s_load_dword s{{[0-9]+}}
-define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
-entry:
- %0 = load i32, i32 addrspace(2)* %in
- store i32 %0, i32 addrspace(1)* %out
- ret void
-}
-
-; Load a f32 value from the constant address space.
-; FUNC-LABEL: {{^}}load_const_addrspace_f32:
-; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-
-; SI: s_load_dword s{{[0-9]+}}
-define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
- %1 = load float, float addrspace(2)* %in
- store float %1, float addrspace(1)* %out
- ret void
-}
-
-;===------------------------------------------------------------------------===;
-; LOCAL ADDRESS SPACE
-;===------------------------------------------------------------------------===;
-
-; Load an i8 value from the local address space.
-; FUNC-LABEL: {{^}}load_i8_local:
-; R600: LDS_UBYTE_READ_RET
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_u8
-define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
- %1 = load i8, i8 addrspace(3)* %in
- %2 = zext i8 %1 to i32
- store i32 %2, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_i8_sext_local:
-; R600: LDS_UBYTE_READ_RET
-; R600: BFE_INT
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_i8
-define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
-entry:
- %0 = load i8, i8 addrspace(3)* %in
- %1 = sext i8 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v2i8_local:
-; R600: LDS_UBYTE_READ_RET
-; R600: LDS_UBYTE_READ_RET
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_u8
-; SI: ds_read_u8
-define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
-entry:
- %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
- %1 = zext <2 x i8> %0 to <2 x i32>
- store <2 x i32> %1, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v2i8_sext_local:
-; R600-DAG: LDS_UBYTE_READ_RET
-; R600-DAG: LDS_UBYTE_READ_RET
-; R600-DAG: BFE_INT
-; R600-DAG: BFE_INT
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_i8
-; SI: ds_read_i8
-define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
-entry:
- %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
- %1 = sext <2 x i8> %0 to <2 x i32>
- store <2 x i32> %1, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v4i8_local:
-; R600: LDS_UBYTE_READ_RET
-; R600: LDS_UBYTE_READ_RET
-; R600: LDS_UBYTE_READ_RET
-; R600: LDS_UBYTE_READ_RET
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
-entry:
- %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
- %1 = zext <4 x i8> %0 to <4 x i32>
- store <4 x i32> %1, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v4i8_sext_local:
-; R600-DAG: LDS_UBYTE_READ_RET
-; R600-DAG: LDS_UBYTE_READ_RET
-; R600-DAG: LDS_UBYTE_READ_RET
-; R600-DAG: LDS_UBYTE_READ_RET
-; R600-DAG: BFE_INT
-; R600-DAG: BFE_INT
-; R600-DAG: BFE_INT
-; R600-DAG: BFE_INT
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_i8
-; SI: ds_read_i8
-; SI: ds_read_i8
-; SI: ds_read_i8
-define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
-entry:
- %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
- %1 = sext <4 x i8> %0 to <4 x i32>
- store <4 x i32> %1, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; Load an i16 value from the local address space.
-; FUNC-LABEL: {{^}}load_i16_local:
-; R600: LDS_USHORT_READ_RET
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_u16
-define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
-entry:
- %0 = load i16 , i16 addrspace(3)* %in
- %1 = zext i16 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_i16_sext_local:
-; R600: LDS_USHORT_READ_RET
-; R600: BFE_INT
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_i16
-define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
-entry:
- %0 = load i16, i16 addrspace(3)* %in
- %1 = sext i16 %0 to i32
- store i32 %1, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v2i16_local:
-; R600: LDS_USHORT_READ_RET
-; R600: LDS_USHORT_READ_RET
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_u16
-; SI: ds_read_u16
-define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
-entry:
- %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
- %1 = zext <2 x i16> %0 to <2 x i32>
- store <2 x i32> %1, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v2i16_sext_local:
-; R600-DAG: LDS_USHORT_READ_RET
-; R600-DAG: LDS_USHORT_READ_RET
-; R600-DAG: BFE_INT
-; R600-DAG: BFE_INT
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_i16
-; SI: ds_read_i16
-define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
-entry:
- %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
- %1 = sext <2 x i16> %0 to <2 x i32>
- store <2 x i32> %1, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v4i16_local:
-; R600: LDS_USHORT_READ_RET
-; R600: LDS_USHORT_READ_RET
-; R600: LDS_USHORT_READ_RET
-; R600: LDS_USHORT_READ_RET
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
-entry:
- %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
- %1 = zext <4 x i16> %0 to <4 x i32>
- store <4 x i32> %1, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}load_v4i16_sext_local:
-; R600-DAG: LDS_USHORT_READ_RET
-; R600-DAG: LDS_USHORT_READ_RET
-; R600-DAG: LDS_USHORT_READ_RET
-; R600-DAG: LDS_USHORT_READ_RET
-; R600-DAG: BFE_INT
-; R600-DAG: BFE_INT
-; R600-DAG: BFE_INT
-; R600-DAG: BFE_INT
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_i16
-; SI: ds_read_i16
-; SI: ds_read_i16
-; SI: ds_read_i16
-define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
-entry:
- %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
- %1 = sext <4 x i16> %0 to <4 x i32>
- store <4 x i32> %1, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; load an i32 value from the local address space.
-; FUNC-LABEL: {{^}}load_i32_local:
-; R600: LDS_READ_RET
-; SI-NOT: s_wqm_b64
-; SI: s_mov_b32 m0
-; SI: ds_read_b32
-define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
-entry:
- %0 = load i32, i32 addrspace(3)* %in
- store i32 %0, i32 addrspace(1)* %out
- ret void
-}
-
-; load a f32 value from the local address space.
-; FUNC-LABEL: {{^}}load_f32_local:
-; R600: LDS_READ_RET
-; SI: s_mov_b32 m0
-; SI: ds_read_b32
-define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
-entry:
- %0 = load float, float addrspace(3)* %in
- store float %0, float addrspace(1)* %out
- ret void
-}
-
-; load a v2f32 value from the local address space
-; FUNC-LABEL: {{^}}load_v2f32_local:
-; R600: LDS_READ_RET
-; R600: LDS_READ_RET
-; SI: s_mov_b32 m0
-; SI: ds_read_b64
-define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
-entry:
- %0 = load <2 x float>, <2 x float> addrspace(3)* %in
- store <2 x float> %0, <2 x float> addrspace(1)* %out
- ret void
-}
-
-; Test loading a i32 and v2i32 value from the same base pointer.
-; FUNC-LABEL: {{^}}load_i32_v2i32_local:
-; R600: LDS_READ_RET
-; R600: LDS_READ_RET
-; R600: LDS_READ_RET
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_read2_b32
-define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) {
- %scalar = load i32, i32 addrspace(3)* %in
- %tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)*
- %vec_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(3)* %tmp0, i32 2
- %vec0 = load <2 x i32>, <2 x i32> addrspace(3)* %vec_ptr, align 4
- %vec1 = insertelement <2 x i32> <i32 0, i32 0>, i32 %scalar, i32 0
- %vec = add <2 x i32> %vec0, %vec1
- store <2 x i32> %vec, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-
-@lds = addrspace(3) global [512 x i32] undef, align 4
-
-; On SI we need to make sure that the base offset is a register and not
-; an immediate.
-; FUNC-LABEL: {{^}}load_i32_local_const_ptr:
-; SI: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0
-; SI: ds_read_b32 v{{[0-9]+}}, v[[ZERO]] offset:4
-; R600: LDS_READ_RET
-define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
-entry:
- %tmp0 = getelementptr [512 x i32], [512 x i32] addrspace(3)* @lds, i32 0, i32 1
- %tmp1 = load i32, i32 addrspace(3)* %tmp0
- %tmp2 = getelementptr i32, i32 addrspace(1)* %out, i32 1
- store i32 %tmp1, i32 addrspace(1)* %tmp2
- ret void
-}
diff --git a/llvm/test/CodeGen/AMDGPU/load.vec.ll b/llvm/test/CodeGen/AMDGPU/load.vec.ll
deleted file mode 100644
index 02f883cd8e9..00000000000
--- a/llvm/test/CodeGen/AMDGPU/load.vec.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s
-
-; load a v2i32 value from the global address space.
-; EG: {{^}}load_v2i32:
-; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0
-; SI: {{^}}load_v2i32:
-; SI: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
-define void @load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
- store <2 x i32> %a, <2 x i32> addrspace(1)* %out
- ret void
-}
-
-; load a v4i32 value from the global address space.
-; EG: {{^}}load_v4i32:
-; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0
-; SI: {{^}}load_v4i32:
-; SI: buffer_load_dwordx4 v[{{[0-9]+:[0-9]+}}]
-define void @load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
- store <4 x i32> %a, <4 x i32> addrspace(1)* %out
- ret void
-}
diff --git a/llvm/test/CodeGen/AMDGPU/load64.ll b/llvm/test/CodeGen/AMDGPU/load64.ll
deleted file mode 100644
index 74beabdc007..00000000000
--- a/llvm/test/CodeGen/AMDGPU/load64.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-; load a f64 value from the global address space.
-; CHECK-LABEL: {{^}}load_f64:
-; CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
-; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
-define void @load_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
- %1 = load double, double addrspace(1)* %in
- store double %1, double addrspace(1)* %out
- ret void
-}
-
-; CHECK-LABEL: {{^}}load_i64:
-; CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
-; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
-define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %tmp = load i64, i64 addrspace(1)* %in
- store i64 %tmp, i64 addrspace(1)* %out, align 8
- ret void
-}
-
-; Load a f64 value from the constant address space.
-; CHECK-LABEL: {{^}}load_const_addrspace_f64:
-; CHECK: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}]
-; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
-define void @load_const_addrspace_f64(double addrspace(1)* %out, double addrspace(2)* %in) {
- %1 = load double, double addrspace(2)* %in
- store double %1, double addrspace(1)* %out
- ret void
-}
diff --git a/llvm/test/CodeGen/AMDGPU/local-memory.ll b/llvm/test/CodeGen/AMDGPU/local-memory.ll
index 6ccfe737d27..8d48f594b23 100644
--- a/llvm/test/CodeGen/AMDGPU/local-memory.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-memory.ll
@@ -43,6 +43,41 @@ entry:
ret void
}
+@lds = addrspace(3) global [512 x i32] undef, align 4
+
+; On SI we need to make sure that the base offset is a register and not
+; an immediate.
+; FUNC-LABEL: {{^}}load_i32_local_const_ptr:
+; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0
+; GCN: ds_read_b32 v{{[0-9]+}}, v[[ZERO]] offset:4
+; R600: LDS_READ_RET
+define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
+entry:
+ %tmp0 = getelementptr [512 x i32], [512 x i32] addrspace(3)* @lds, i32 0, i32 1
+ %tmp1 = load i32, i32 addrspace(3)* %tmp0
+ %tmp2 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+ store i32 %tmp1, i32 addrspace(1)* %tmp2
+ ret void
+}
+
+; Test loading a i32 and v2i32 value from the same base pointer.
+; FUNC-LABEL: {{^}}load_i32_v2i32_local:
+; R600: LDS_READ_RET
+; R600: LDS_READ_RET
+; R600: LDS_READ_RET
+; GCN-DAG: ds_read_b32
+; GCN-DAG: ds_read2_b32
+define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) {
+ %scalar = load i32, i32 addrspace(3)* %in
+ %tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)*
+ %vec_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(3)* %tmp0, i32 2
+ %vec0 = load <2 x i32>, <2 x i32> addrspace(3)* %vec_ptr, align 4
+ %vec1 = insertelement <2 x i32> <i32 0, i32 0>, i32 %scalar, i32 0
+ %vec = add <2 x i32> %vec0, %vec1
+ store <2 x i32> %vec, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
declare i32 @llvm.r600.read.tidig.x() #0
declare void @llvm.AMDGPU.barrier.local()
diff --git a/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll b/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll
index 4a77e003ed5..bfbe1c00a57 100644
--- a/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll
+++ b/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll
@@ -1,300 +1,387 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
-
-; SI-LABEL: {{^}}unaligned_load_store_i16_local:
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: s_endpgm
-define void @unaligned_load_store_i16_local(i16 addrspace(3)* %p, i16 addrspace(3)* %r) nounwind {
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}local_unaligned_load_store_i16:
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: s_endpgm
+define void @local_unaligned_load_store_i16(i16 addrspace(3)* %p, i16 addrspace(3)* %r) #0 {
%v = load i16, i16 addrspace(3)* %p, align 1
store i16 %v, i16 addrspace(3)* %r, align 1
ret void
}
-; SI-LABEL: {{^}}unaligned_load_store_i16_global:
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: s_endpgm
-define void @unaligned_load_store_i16_global(i16 addrspace(1)* %p, i16 addrspace(1)* %r) nounwind {
+; FUNC-LABEL: {{^}}unaligned_load_store_i16_global:
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+define void @unaligned_load_store_i16_global(i16 addrspace(1)* %p, i16 addrspace(1)* %r) #0 {
%v = load i16, i16 addrspace(1)* %p, align 1
store i16 %v, i16 addrspace(1)* %r, align 1
ret void
}
-; SI-LABEL: {{^}}unaligned_load_store_i32_local:
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: s_endpgm
-define void @unaligned_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
+; FUNC-LABEL: {{^}}local_unaligned_load_store_i32:
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: s_endpgm
+define void @local_unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) #0 {
%v = load i32, i32 addrspace(3)* %p, align 1
store i32 %v, i32 addrspace(3)* %r, align 1
ret void
}
-; SI-LABEL: {{^}}unaligned_load_store_i32_global:
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-define void @unaligned_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace(1)* %r) nounwind {
+; FUNC-LABEL: {{^}}global_unaligned_load_store_i32:
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+define void @global_unaligned_load_store_i32(i32 addrspace(1)* %p, i32 addrspace(1)* %r) #0 {
%v = load i32, i32 addrspace(1)* %p, align 1
store i32 %v, i32 addrspace(1)* %r, align 1
ret void
}
-; SI-LABEL: {{^}}align2_load_store_i32_global:
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_store_short
-; SI: buffer_store_short
-define void @align2_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace(1)* %r) nounwind {
+; FUNC-LABEL: {{^}}global_align2_load_store_i32:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_load_ushort
+; GCN-NOHSA: buffer_store_short
+; GCN-NOHSA: buffer_store_short
+
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_load_ushort
+; GCN-HSA: flat_store_short
+; GCN-HSA: flat_store_short
+define void @global_align2_load_store_i32(i32 addrspace(1)* %p, i32 addrspace(1)* %r) #0 {
%v = load i32, i32 addrspace(1)* %p, align 2
store i32 %v, i32 addrspace(1)* %r, align 2
ret void
}
-; SI-LABEL: {{^}}align2_load_store_i32_local:
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_write_b16
-; SI: ds_write_b16
-define void @align2_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
+; FUNC-LABEL: {{^}}local_align2_load_store_i32:
+; GCN: ds_read_u16
+; GCN: ds_read_u16
+; GCN: ds_write_b16
+; GCN: ds_write_b16
+define void @local_align2_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) #0 {
%v = load i32, i32 addrspace(3)* %p, align 2
store i32 %v, i32 addrspace(3)* %r, align 2
ret void
}
; FIXME: Unnecessary packing and unpacking of bytes.
-; SI-LABEL: {{^}}unaligned_load_store_i64_local:
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-; SI: ds_write_b8
-; SI: s_endpgm
-define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(3)* %r) {
+; FUNC-LABEL: {{^}}local_unaligned_load_store_i64:
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+; GCN: ds_write_b8
+; GCN: s_endpgm
+define void @local_unaligned_load_store_i64(i64 addrspace(3)* %p, i64 addrspace(3)* %r) {
%v = load i64, i64 addrspace(3)* %p, align 1
store i64 %v, i64 addrspace(3)* %r, align 1
ret void
}
-; SI-LABEL: {{^}}unaligned_load_store_v2i32_local:
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-
-; SI: ds_write_b8
-; XSI-NOT: v_or_b32
-; XSI-NOT: v_lshl
-; SI: ds_write_b8
-; SI: s_endpgm
-define void @unaligned_load_store_v2i32_local(<2 x i32> addrspace(3)* %p, <2 x i32> addrspace(3)* %r) {
+; FUNC-LABEL: {{^}}local_unaligned_load_store_v2i32:
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+
+; GCN: ds_write_b8
+; XGCN-NOT: v_or_b32
+; XGCN-NOT: v_lshl
+; GCN: ds_write_b8
+; GCN: s_endpgm
+define void @local_unaligned_load_store_v2i32(<2 x i32> addrspace(3)* %p, <2 x i32> addrspace(3)* %r) {
%v = load <2 x i32>, <2 x i32> addrspace(3)* %p, align 1
store <2 x i32> %v, <2 x i32> addrspace(3)* %r, align 1
ret void
}
-; SI-LABEL: {{^}}unaligned_load_store_i64_global:
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-
-; XSI-NOT: v_or_
-; XSI-NOT: v_lshl
-
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-; SI: buffer_store_byte
-define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) {
+; FUNC-LABEL: {{^}}unaligned_load_store_i64_global:
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+
+; XGCN-NOT: v_or_
+; XGCN-NOT: v_lshl
+
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) #0 {
%v = load i64, i64 addrspace(1)* %p, align 1
store i64 %v, i64 addrspace(1)* %r, align 1
ret void
}
-; SI-LABEL: {{^}}unaligned_load_store_v4i32_local:
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: ds_write_b8
-; SI: s_endpgm
-define void @unaligned_load_store_v4i32_local(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind {
+; FUNC-LABEL: {{^}}local_unaligned_load_store_v4i32:
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: s_endpgm
+define void @local_unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) #0 {
%v = load <4 x i32>, <4 x i32> addrspace(3)* %p, align 1
store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1
ret void
}
-; FIXME: We mark v4i32 as custom, so misaligned loads are never expanded.
-; FIXME-SI-LABEL: {{^}}unaligned_load_store_v4i32_global
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-; FIXME-SI: buffer_load_ubyte
-define void @unaligned_load_store_v4i32_global(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) nounwind {
+; FUNC-LABEL: {{^}}global_unaligned_load_store_v4i32:
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+; GCN-NOHSA: buffer_store_byte
+
+
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+; GCN-HSA: flat_store_byte
+define void @global_unaligned_load_store_v4i32(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) #0 {
%v = load <4 x i32>, <4 x i32> addrspace(1)* %p, align 1
store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 1
ret void
}
-; SI-LABEL: {{^}}load_lds_i64_align_4:
-; SI: ds_read2_b32
-; SI: s_endpgm
-define void @load_lds_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
+; FUNC-LABEL: {{^}}local_load_i64_align_4:
+; GCN: ds_read2_b32
+define void @local_load_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
%val = load i64, i64 addrspace(3)* %in, align 4
store i64 %val, i64 addrspace(1)* %out, align 8
ret void
}
-; SI-LABEL: {{^}}load_lds_i64_align_4_with_offset
-; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9
-; SI: s_endpgm
-define void @load_lds_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
+; FUNC-LABEL: {{^}}local_load_i64_align_4_with_offset
+; GCN: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9
+define void @local_load_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
%ptr = getelementptr i64, i64 addrspace(3)* %in, i32 4
%val = load i64, i64 addrspace(3)* %ptr, align 4
store i64 %val, i64 addrspace(1)* %out, align 8
ret void
}
-; SI-LABEL: {{^}}load_lds_i64_align_4_with_split_offset:
+; FUNC-LABEL: {{^}}local_load_i64_align_4_with_split_offset:
; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
-; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset1:1
-; SI: s_endpgm
-define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
+; GCN: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset1:1
+; GCN: s_endpgm
+define void @local_load_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
%ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)*
%ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255
%ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
@@ -303,49 +390,95 @@ define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture
ret void
}
-; SI-LABEL: {{^}}load_lds_i64_align_1:
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: buffer_store_dwordx2
-; SI: s_endpgm
-
-define void @load_lds_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
+; FUNC-LABEL: {{^}}local_load_i64_align_1:
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: ds_read_u8
+; GCN: store_dwordx2
+define void @local_load_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
%val = load i64, i64 addrspace(3)* %in, align 1
store i64 %val, i64 addrspace(1)* %out, align 8
ret void
}
-; SI-LABEL: {{^}}store_lds_i64_align_4:
-; SI: ds_write2_b32
-; SI: s_endpgm
-define void @store_lds_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 {
+; FUNC-LABEL: {{^}}local_store_i64_align_4:
+; GCN: ds_write2_b32
+define void @local_store_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 {
store i64 %val, i64 addrspace(3)* %out, align 4
ret void
}
-; SI-LABEL: {{^}}store_lds_i64_align_4_with_offset
-; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9
-; SI: s_endpgm
-define void @store_lds_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
+; FUNC-LABEL: {{^}}local_store_i64_align_4_with_offset
+; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9
+; GCN: s_endpgm
+define void @local_store_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
%ptr = getelementptr i64, i64 addrspace(3)* %out, i32 4
store i64 0, i64 addrspace(3)* %ptr, align 4
ret void
}
-; SI-LABEL: {{^}}store_lds_i64_align_4_with_split_offset:
+; FUNC-LABEL: {{^}}local_store_i64_align_4_with_split_offset:
; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
-; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1
-; SI: s_endpgm
-define void @store_lds_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 {
+; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1
+; GCN: s_endpgm
+define void @local_store_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 {
%ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)*
%ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255
%ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
store i64 0, i64 addrspace(3)* %out, align 4
ret void
}
+
+; FUNC-LABEL: {{^}}constant_load_unaligned_i16:
+; GCN-NOHSA: buffer_load_ushort
+; GCN-HSA: flat_load_ushort
+
+; EG: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
+define void @constant_load_unaligned_i16(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
+entry:
+ %tmp0 = getelementptr i16, i16 addrspace(2)* %in, i32 1
+ %tmp1 = load i16, i16 addrspace(2)* %tmp0
+ %tmp2 = zext i16 %tmp1 to i32
+ store i32 %tmp2, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_unaligned_i32:
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+define void @constant_load_unaligned_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
+entry:
+ %tmp0 = load i32, i32 addrspace(2)* %in, align 1
+ store i32 %tmp0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}constant_load_unaligned_f32:
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+; GCN-NOHSA: buffer_load_ubyte
+
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+; GCN-HSA: flat_load_ubyte
+define void @constant_load_unaligned_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
+ %tmp1 = load float, float addrspace(2)* %in, align 1
+ store float %tmp1, float addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
OpenPOWER on IntegriCloud