summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll38
1 files changed, 37 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
index 320d008c220..696b33e75fe 100644
--- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}chain_hi_to_lo_private:
; GCN: buffer_load_ushort [[DST:v[0-9]+]], off, [[RSRC:s\[[0-9]+:[0-9]+\]]], [[SOFF:s[0-9]+]] offset:2
@@ -139,3 +139,39 @@ bb:
ret <2 x half> %result
}
+
+; Make sure we don't lose any of the private stores.
+; GCN-LABEL: {{^}}vload2_private:
+; GCN: buffer_store_short v{{[0-9]+}}, off, s[0:3], s{{[0-9]+}} offset:4
+; GCN: buffer_store_short_d16_hi v{{[0-9]+}}, off, s[0:3], s{{[0-9]+}} offset:6
+; GCN: buffer_store_short v{{[0-9]+}}, off, s[0:3], s{{[0-9]+}} offset:8
+
+; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s{{[0-9]+}} offset:4
+; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s{{[0-9]+}} offset:6
+; GCN: buffer_load_short_d16_hi v{{[0-9]+}}, off, s[0:3], s{{[0-9]+}} offset:8
+define amdgpu_kernel void @vload2_private(i16 addrspace(1)* nocapture readonly %in, <2 x i16> addrspace(1)* nocapture %out) #0 {
+entry:
+ %loc = alloca [3 x i16], align 2, addrspace(5)
+ %loc.0.sroa_cast1 = bitcast [3 x i16] addrspace(5)* %loc to i8 addrspace(5)*
+ %tmp = load i16, i16 addrspace(1)* %in, align 2
+ %loc.0.sroa_idx = getelementptr inbounds [3 x i16], [3 x i16] addrspace(5)* %loc, i32 0, i32 0
+ store volatile i16 %tmp, i16 addrspace(5)* %loc.0.sroa_idx
+ %arrayidx.1 = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 1
+ %tmp1 = load i16, i16 addrspace(1)* %arrayidx.1, align 2
+ %loc.2.sroa_idx3 = getelementptr inbounds [3 x i16], [3 x i16] addrspace(5)* %loc, i32 0, i32 1
+ store volatile i16 %tmp1, i16 addrspace(5)* %loc.2.sroa_idx3
+ %arrayidx.2 = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 2
+ %tmp2 = load i16, i16 addrspace(1)* %arrayidx.2, align 2
+ %loc.4.sroa_idx = getelementptr inbounds [3 x i16], [3 x i16] addrspace(5)* %loc, i32 0, i32 2
+ store volatile i16 %tmp2, i16 addrspace(5)* %loc.4.sroa_idx
+ %loc.0.sroa_cast = bitcast [3 x i16] addrspace(5)* %loc to <2 x i16> addrspace(5)*
+ %loc.0. = load <2 x i16>, <2 x i16> addrspace(5)* %loc.0.sroa_cast, align 2
+ store <2 x i16> %loc.0., <2 x i16> addrspace(1)* %out, align 4
+ %loc.2.sroa_idx = getelementptr inbounds [3 x i16], [3 x i16] addrspace(5)* %loc, i32 0, i32 1
+ %loc.2.sroa_cast = bitcast i16 addrspace(5)* %loc.2.sroa_idx to <2 x i16> addrspace(5)*
+ %loc.2. = load <2 x i16>, <2 x i16> addrspace(5)* %loc.2.sroa_cast, align 2
+ %arrayidx6 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 1
+ store <2 x i16> %loc.2., <2 x i16> addrspace(1)* %arrayidx6, align 4
+ %loc.0.sroa_cast2 = bitcast [3 x i16] addrspace(5)* %loc to i8 addrspace(5)*
+ ret void
+}
OpenPOWER on IntegriCloud