summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorSjoerd Meijer <sjoerd.meijer@arm.com>2018-08-08 13:11:31 +0000
committerSjoerd Meijer <sjoerd.meijer@arm.com>2018-08-08 13:11:31 +0000
commitdb5908deb964b81234a59ab477b460275552c038 (patch)
treeadac38e595cb60daba7f71ac92bc6995baeb88ba /llvm/test
parent0735cfbd84b29a8348158fe7ae7d1e7375092c94 (diff)
downloadbcm5719-llvm-db5908deb964b81234a59ab477b460275552c038.tar.gz
bcm5719-llvm-db5908deb964b81234a59ab477b460275552c038.zip
[ARM] FP16: vector vmov and vdup support
This adds codegen support for the vmov_n_f16 and vdup_n_f16 variants. Differential Revision: https://reviews.llvm.org/D50329 llvm-svn: 339238
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll124
1 files changed, 72 insertions, 52 deletions
diff --git a/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll b/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll
index c8cf71aeaf3..6afad0c9324 100644
--- a/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll
+++ b/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll
@@ -1120,58 +1120,78 @@ entry:
; %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vtrn1.i, 0, 1
; ret %struct.float16x8x2_t %.fca.0.1.insert
;}
-;
-;define dso_local <4 x half> @test_vmov_n_f16(float %a.coerce) {
-;entry:
-; %0 = bitcast float %a.coerce to i32
-; %tmp.0.extract.trunc = trunc i32 %0 to i16
-; %1 = bitcast i16 %tmp.0.extract.trunc to half
-; %vecinit = insertelement <4 x half> undef, half %1, i32 0
-; %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
-; ret <4 x half> %vecinit4
-;}
-;
-;define dso_local <8 x half> @test_vmovq_n_f16(float %a.coerce) {
-;entry:
-; %0 = bitcast float %a.coerce to i32
-; %tmp.0.extract.trunc = trunc i32 %0 to i16
-; %1 = bitcast i16 %tmp.0.extract.trunc to half
-; %vecinit = insertelement <8 x half> undef, half %1, i32 0
-; %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
-; ret <8 x half> %vecinit8
-;}
-;
-;define dso_local <4 x half> @test_vdup_n_f16(float %a.coerce) {
-;entry:
-; %0 = bitcast float %a.coerce to i32
-; %tmp.0.extract.trunc = trunc i32 %0 to i16
-; %1 = bitcast i16 %tmp.0.extract.trunc to half
-; %vecinit = insertelement <4 x half> undef, half %1, i32 0
-; %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
-; ret <4 x half> %vecinit4
-;}
-;
-;define dso_local <8 x half> @test_vdupq_n_f16(float %a.coerce) {
-;entry:
-; %0 = bitcast float %a.coerce to i32
-; %tmp.0.extract.trunc = trunc i32 %0 to i16
-; %1 = bitcast i16 %tmp.0.extract.trunc to half
-; %vecinit = insertelement <8 x half> undef, half %1, i32 0
-; %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
-; ret <8 x half> %vecinit8
-;}
-;
-;define dso_local <4 x half> @test_vdup_lane_f16(<4 x half> %a) {
-;entry:
-; %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; ret <4 x half> %shuffle
-;}
-;
-;define dso_local <8 x half> @test_vdupq_lane_f16(<4 x half> %a) {
-;entry:
-; %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; ret <8 x half> %shuffle
-;}
+
+define dso_local <4 x half> @test_vmov_n_f16(float %a.coerce) {
+; CHECK-LABEL: test_vmov_n_f16:
+; CHECK: vdup.16 d0, d0[0]
+; CHECK-NEXT: bx lr
+entry:
+ %0 = bitcast float %a.coerce to i32
+ %tmp.0.extract.trunc = trunc i32 %0 to i16
+ %1 = bitcast i16 %tmp.0.extract.trunc to half
+ %vecinit = insertelement <4 x half> undef, half %1, i32 0
+ %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
+ ret <4 x half> %vecinit4
+}
+
+define dso_local <8 x half> @test_vmovq_n_f16(float %a.coerce) {
+; CHECK-LABEL: test_vmovq_n_f16:
+; CHECK: vdup.16 q0, d0[0]
+; CHECK-NEXT: bx lr
+entry:
+ %0 = bitcast float %a.coerce to i32
+ %tmp.0.extract.trunc = trunc i32 %0 to i16
+ %1 = bitcast i16 %tmp.0.extract.trunc to half
+ %vecinit = insertelement <8 x half> undef, half %1, i32 0
+ %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
+ ret <8 x half> %vecinit8
+}
+
+define dso_local <4 x half> @test_vdup_n_f16(float %a.coerce) {
+; CHECK-LABEL: test_vdup_n_f16:
+; CHECK: vdup.16 d0, d0[0]
+; CHECK-NEXT: bx lr
+entry:
+ %0 = bitcast float %a.coerce to i32
+ %tmp.0.extract.trunc = trunc i32 %0 to i16
+ %1 = bitcast i16 %tmp.0.extract.trunc to half
+ %vecinit = insertelement <4 x half> undef, half %1, i32 0
+ %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
+ ret <4 x half> %vecinit4
+}
+
+define dso_local <8 x half> @test_vdupq_n_f16(float %a.coerce) {
+; CHECK-LABEL: test_vdupq_n_f16:
+; CHECK: vdup.16 q0, d0[0]
+; CHECK-NEXT: bx lr
+entry:
+ %0 = bitcast float %a.coerce to i32
+ %tmp.0.extract.trunc = trunc i32 %0 to i16
+ %1 = bitcast i16 %tmp.0.extract.trunc to half
+ %vecinit = insertelement <8 x half> undef, half %1, i32 0
+ %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
+ ret <8 x half> %vecinit8
+}
+
+define dso_local <4 x half> @test_vdup_lane_f16(<4 x half> %a) {
+; CHECK-LABEL: test_vdup_lane_f16:
+; CHECK: vdup.32 d0, d0[3]
+; CHECK-NEXT: bx lr
+entry:
+ %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ ret <4 x half> %shuffle
+}
+
+define dso_local <8 x half> @test_vdupq_lane_f16(<4 x half> %a) {
+; CHECK-LABEL: test_vdupq_lane_f16:
+; CHECK: vdup.16 q0, d0[3]
+; CHECK-NEXT: bx lr
+entry:
+ %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ ret <8 x half> %shuffle
+}
+
+; FIXME (PR38404)
;
;define dso_local <4 x half> @test_vext_f16(<4 x half> %a, <4 x half> %b) {
;entry:
OpenPOWER on IntegriCloud