summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2018-06-20 19:45:48 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2018-06-20 19:45:48 +0000
commit5a4ec8127f7743f83dd17b2ef384958d54c4c95c (patch)
treeabfdad8699b0df39e75f7231b8f82ab9a18943e4 /llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll
parent3d06668ad40e68abdc971497835b1e3f19aecc41 (diff)
downloadbcm5719-llvm-5a4ec8127f7743f83dd17b2ef384958d54c4c95c.tar.gz
bcm5719-llvm-5a4ec8127f7743f83dd17b2ef384958d54c4c95c.zip
AMDGPU: Fix scalar_to_vector for v4i16/v4f16
llvm-svn: 335161
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll33
1 files changed, 33 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll
index 0f09fa17423..ff634c609e8 100644
--- a/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll
+++ b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll
@@ -29,6 +29,39 @@ define amdgpu_kernel void @scalar_to_vector_v2f32(<4 x i16> addrspace(1)* %out,
ret void
}
+; GCN-LABEL: {{^}}scalar_to_vector_v4i16:
+; VI: v_lshlrev_b16_e32
+; VI: v_lshlrev_b16_e32
+; VI: v_or_b32_e32
+; VI: v_lshlrev_b32
+; VI: v_or_b32_sdwa
+; VI: v_or_b32_sdwa
+define amdgpu_kernel void @scalar_to_vector_v4i16() {
+bb:
+ %tmp = load <2 x i8>, <2 x i8> addrspace(1)* undef, align 1
+ %tmp1 = shufflevector <2 x i8> %tmp, <2 x i8> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 0, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
+ store <8 x i8> %tmp2, <8 x i8> addrspace(1)* undef, align 8
+ ret void
+}
+
+; GCN-LABEL: {{^}}scalar_to_vector_v4f16:
+; VI: v_lshlrev_b16_e32
+; VI: v_lshlrev_b16_e32
+; VI: v_or_b32_e32
+; VI: v_lshlrev_b32
+; VI: v_or_b32_sdwa
+; VI: v_or_b32_sdwa
+define amdgpu_kernel void @scalar_to_vector_v4f16() {
+bb:
+ %load = load half, half addrspace(1)* undef, align 1
+ %tmp = bitcast half %load to <2 x i8>
+ %tmp1 = shufflevector <2 x i8> %tmp, <2 x i8> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 0, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
+ store <8 x i8> %tmp2, <8 x i8> addrspace(1)* undef, align 8
+ ret void
+}
+
; Getting a SCALAR_TO_VECTOR seems to be tricky. These cases managed
; to produce one, but for some reason never made it to selection.
OpenPOWER on IntegriCloud