[ARM] FP16: vector vmov and vdup support

This adds codegen support for the vmov_n_f16 and vdup_n_f16 variants. Differential Revision: https://reviews.llvm.org/D50329 llvm-svn: 339238
author: Sjoerd Meijer <sjoerd.meijer@arm.com> 2018-08-08 13:11:31 +0000
committer: Sjoerd Meijer <sjoerd.meijer@arm.com> 2018-08-08 13:11:31 +0000
commit: db5908deb964b81234a59ab477b460275552c038 (patch)
tree: adac38e595cb60daba7f71ac92bc6995baeb88ba /llvm/test
parent: 0735cfbd84b29a8348158fe7ae7d1e7375092c94 (diff)
download: bcm5719-llvm-db5908deb964b81234a59ab477b460275552c038.tar.gz
bcm5719-llvm-db5908deb964b81234a59ab477b460275552c038.zip
1 files changed, 72 insertions, 52 deletions
diff --git a/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll b/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll
index c8cf71aeaf3..6afad0c9324 100644
--- a/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll
+++ b/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll
@@ -1120,58 +1120,78 @@ entry:
 ;  %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vtrn1.i, 0, 1
 ;  ret %struct.float16x8x2_t %.fca.0.1.insert
 ;}
-;
-;define dso_local <4 x half> @test_vmov_n_f16(float %a.coerce) {
-;entry:
-;  %0 = bitcast float %a.coerce to i32
-;  %tmp.0.extract.trunc = trunc i32 %0 to i16
-;  %1 = bitcast i16 %tmp.0.extract.trunc to half
-;  %vecinit = insertelement <4 x half> undef, half %1, i32 0
-;  %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
-;  ret <4 x half> %vecinit4
-;}
-;
-;define dso_local <8 x half> @test_vmovq_n_f16(float %a.coerce) {
-;entry:
-;  %0 = bitcast float %a.coerce to i32
-;  %tmp.0.extract.trunc = trunc i32 %0 to i16
-;  %1 = bitcast i16 %tmp.0.extract.trunc to half
-;  %vecinit = insertelement <8 x half> undef, half %1, i32 0
-;  %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
-;  ret <8 x half> %vecinit8
-;}
-;
-;define dso_local <4 x half> @test_vdup_n_f16(float %a.coerce) {
-;entry:
-;  %0 = bitcast float %a.coerce to i32
-;  %tmp.0.extract.trunc = trunc i32 %0 to i16
-;  %1 = bitcast i16 %tmp.0.extract.trunc to half
-;  %vecinit = insertelement <4 x half> undef, half %1, i32 0
-;  %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
-;  ret <4 x half> %vecinit4
-;}
-;
-;define dso_local <8 x half> @test_vdupq_n_f16(float %a.coerce) {
-;entry:
-;  %0 = bitcast float %a.coerce to i32
-;  %tmp.0.extract.trunc = trunc i32 %0 to i16
-;  %1 = bitcast i16 %tmp.0.extract.trunc to half
-;  %vecinit = insertelement <8 x half> undef, half %1, i32 0
-;  %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
-;  ret <8 x half> %vecinit8
-;}
-;
-;define dso_local <4 x half> @test_vdup_lane_f16(<4 x half> %a) {
-;entry:
-;  %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-;  ret <4 x half> %shuffle
-;}
-;
-;define dso_local <8 x half> @test_vdupq_lane_f16(<4 x half> %a) {
-;entry:
-;  %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-;  ret <8 x half> %shuffle
-;}
+
+define dso_local <4 x half> @test_vmov_n_f16(float %a.coerce) {
+; CHECK-LABEL: test_vmov_n_f16:
+; CHECK:         vdup.16 d0, d0[0]
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = bitcast float %a.coerce to i32
+  %tmp.0.extract.trunc = trunc i32 %0 to i16
+  %1 = bitcast i16 %tmp.0.extract.trunc to half
+  %vecinit = insertelement <4 x half> undef, half %1, i32 0
+  %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
+  ret <4 x half> %vecinit4
+}
+
+define dso_local <8 x half> @test_vmovq_n_f16(float %a.coerce) {
+; CHECK-LABEL: test_vmovq_n_f16:
+; CHECK:         vdup.16 q0, d0[0]
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = bitcast float %a.coerce to i32
+  %tmp.0.extract.trunc = trunc i32 %0 to i16
+  %1 = bitcast i16 %tmp.0.extract.trunc to half
+  %vecinit = insertelement <8 x half> undef, half %1, i32 0
+  %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
+  ret <8 x half> %vecinit8
+}
+
+define dso_local <4 x half> @test_vdup_n_f16(float %a.coerce) {
+; CHECK-LABEL: test_vdup_n_f16:
+; CHECK:         vdup.16 d0, d0[0]
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = bitcast float %a.coerce to i32
+  %tmp.0.extract.trunc = trunc i32 %0 to i16
+  %1 = bitcast i16 %tmp.0.extract.trunc to half
+  %vecinit = insertelement <4 x half> undef, half %1, i32 0
+  %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
+  ret <4 x half> %vecinit4
+}
+
+define dso_local <8 x half> @test_vdupq_n_f16(float %a.coerce) {
+; CHECK-LABEL: test_vdupq_n_f16:
+; CHECK:        vdup.16 q0, d0[0]
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = bitcast float %a.coerce to i32
+  %tmp.0.extract.trunc = trunc i32 %0 to i16
+  %1 = bitcast i16 %tmp.0.extract.trunc to half
+  %vecinit = insertelement <8 x half> undef, half %1, i32 0
+  %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
+  ret <8 x half> %vecinit8
+}
+
+define dso_local <4 x half> @test_vdup_lane_f16(<4 x half> %a) {
+; CHECK-LABEL: test_vdup_lane_f16:
+; CHECK:         vdup.32 d0, d0[3]
+; CHECK-NEXT:    bx lr
+entry:
+  %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x half> %shuffle
+}
+
+define dso_local <8 x half> @test_vdupq_lane_f16(<4 x half> %a) {
+; CHECK-LABEL: test_vdupq_lane_f16:
+; CHECK:         vdup.16 q0, d0[3]
+; CHECK-NEXT:    bx lr
+entry:
+  %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  ret <8 x half> %shuffle
+}
+
+; FIXME (PR38404)
 ;
 ;define dso_local <4 x half> @test_vext_f16(<4 x half> %a, <4 x half> %b) {
 ;entry:
author	Sjoerd Meijer <sjoerd.meijer@arm.com>	2018-08-08 13:11:31 +0000
committer	Sjoerd Meijer <sjoerd.meijer@arm.com>	2018-08-08 13:11:31 +0000
commit	db5908deb964b81234a59ab477b460275552c038 (patch)
tree	adac38e595cb60daba7f71ac92bc6995baeb88ba /llvm/test
parent	0735cfbd84b29a8348158fe7ae7d1e7375092c94 (diff)
download	bcm5719-llvm-db5908deb964b81234a59ab477b460275552c038.tar.gz bcm5719-llvm-db5908deb964b81234a59ab477b460275552c038.zip