summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorSjoerd Meijer <sjoerd.meijer@arm.com>2018-08-08 10:27:34 +0000
committerSjoerd Meijer <sjoerd.meijer@arm.com>2018-08-08 10:27:34 +0000
commit920a4534854f52657ec1c638f5378d8846a3d4c3 (patch)
treeae7e014ffd615cd2030f301cbb820f043f22d10f /llvm/test
parent5477f11ba304939a26ede0f85cc10696b3cc1121 (diff)
downloadbcm5719-llvm-920a4534854f52657ec1c638f5378d8846a3d4c3.tar.gz
bcm5719-llvm-920a4534854f52657ec1c638f5378d8846a3d4c3.zip
[ARM] FP16: vector VMUL variants
This adds codegen support for the vmul_lane_f16 and vmul_n_f16 variants. Differential Revision: https://reviews.llvm.org/D50326 llvm-svn: 339232
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll78
1 files changed, 44 insertions, 34 deletions
diff --git a/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll b/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll
index 496cabc7796..c8cf71aeaf3 100644
--- a/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll
+++ b/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll
@@ -979,43 +979,53 @@ entry:
ret <8 x half> %0
}
-; FIXME (PR38404)
-;
-;define dso_local <4 x half> @test_vmul_lane_f16(<4 x half> %a, <4 x half> %b) {
-;entry:
-; %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; %mul = fmul <4 x half> %shuffle, %a
-; ret <4 x half> %mul
-;}
+define dso_local <4 x half> @test_vmul_lane_f16(<4 x half> %a, <4 x half> %b) {
+; CHECK-LABEL: test_vmul_lane_f16:
+; CHECK: vmul.f16 d0, d0, d1[3]
+; CHECK-NEXT: bx lr
+entry:
+ %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ %mul = fmul <4 x half> %shuffle, %a
+ ret <4 x half> %mul
+}
-;define dso_local <8 x half> @test_vmulq_lane_f16(<8 x half> %a, <4 x half> %b) {
-;entry:
-; %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; %mul = fmul <8 x half> %shuffle, %a
-; ret <8 x half> %mul
-;}
+define dso_local <8 x half> @test_vmulq_lane_f16(<8 x half> %a, <4 x half> %b) {
+; CHECK-LABEL: test_vmulq_lane_f16:
+; CHECK: vmul.f16 q0, q0, d2[3]
+; CHECK-NEXT: bx lr
+entry:
+ %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ %mul = fmul <8 x half> %shuffle, %a
+ ret <8 x half> %mul
+}
-;define dso_local <4 x half> @test_vmul_n_f16(<4 x half> %a, float %b.coerce) {
-;entry:
-; %0 = bitcast float %b.coerce to i32
-; %tmp.0.extract.trunc = trunc i32 %0 to i16
-; %1 = bitcast i16 %tmp.0.extract.trunc to half
-; %vecinit = insertelement <4 x half> undef, half %1, i32 0
-; %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
-; %mul = fmul <4 x half> %vecinit4, %a
-; ret <4 x half> %mul
-;}
+define dso_local <4 x half> @test_vmul_n_f16(<4 x half> %a, float %b.coerce) {
+; CHECK-LABEL: test_vmul_n_f16:
+; CHECK: vmul.f16 d0, d0, d1[0]
+; CHECK-NEXT: bx lr
+entry:
+ %0 = bitcast float %b.coerce to i32
+ %tmp.0.extract.trunc = trunc i32 %0 to i16
+ %1 = bitcast i16 %tmp.0.extract.trunc to half
+ %vecinit = insertelement <4 x half> undef, half %1, i32 0
+ %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
+ %mul = fmul <4 x half> %vecinit4, %a
+ ret <4 x half> %mul
+}
-;define dso_local <8 x half> @test_vmulq_n_f16(<8 x half> %a, float %b.coerce) {
-;entry:
-; %0 = bitcast float %b.coerce to i32
-; %tmp.0.extract.trunc = trunc i32 %0 to i16
-; %1 = bitcast i16 %tmp.0.extract.trunc to half
-; %vecinit = insertelement <8 x half> undef, half %1, i32 0
-; %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
-; %mul = fmul <8 x half> %vecinit8, %a
-; ret <8 x half> %mul
-;}
+define dso_local <8 x half> @test_vmulq_n_f16(<8 x half> %a, float %b.coerce) {
+; CHECK-LABEL: test_vmulq_n_f16:
+; CHECK: vmul.f16 q0, q0, d2[0]
+; CHECK-NEXT: bx lr
+entry:
+ %0 = bitcast float %b.coerce to i32
+ %tmp.0.extract.trunc = trunc i32 %0 to i16
+ %1 = bitcast i16 %tmp.0.extract.trunc to half
+ %vecinit = insertelement <8 x half> undef, half %1, i32 0
+ %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
+ %mul = fmul <8 x half> %vecinit8, %a
+ ret <8 x half> %mul
+}
define dso_local <4 x half> @test_vbsl_f16(<4 x i16> %a, <4 x half> %b, <4 x half> %c) {
; CHECKLABEL: test_vbsl_f16:
OpenPOWER on IntegriCloud