diff options
| author | Sjoerd Meijer <sjoerd.meijer@arm.com> | 2018-08-08 13:11:31 +0000 |
|---|---|---|
| committer | Sjoerd Meijer <sjoerd.meijer@arm.com> | 2018-08-08 13:11:31 +0000 |
| commit | db5908deb964b81234a59ab477b460275552c038 (patch) | |
| tree | adac38e595cb60daba7f71ac92bc6995baeb88ba /llvm/test | |
| parent | 0735cfbd84b29a8348158fe7ae7d1e7375092c94 (diff) | |
| download | bcm5719-llvm-db5908deb964b81234a59ab477b460275552c038.tar.gz bcm5719-llvm-db5908deb964b81234a59ab477b460275552c038.zip | |
[ARM] FP16: vector vmov and vdup support
This adds codegen support for the vmov_n_f16 and vdup_n_f16 variants.
Differential Revision: https://reviews.llvm.org/D50329
llvm-svn: 339238
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll | 124 |
1 files changed, 72 insertions, 52 deletions
diff --git a/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll b/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll index c8cf71aeaf3..6afad0c9324 100644 --- a/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll +++ b/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll @@ -1120,58 +1120,78 @@ entry: ; %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vtrn1.i, 0, 1 ; ret %struct.float16x8x2_t %.fca.0.1.insert ;} -; -;define dso_local <4 x half> @test_vmov_n_f16(float %a.coerce) { -;entry: -; %0 = bitcast float %a.coerce to i32 -; %tmp.0.extract.trunc = trunc i32 %0 to i16 -; %1 = bitcast i16 %tmp.0.extract.trunc to half -; %vecinit = insertelement <4 x half> undef, half %1, i32 0 -; %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer -; ret <4 x half> %vecinit4 -;} -; -;define dso_local <8 x half> @test_vmovq_n_f16(float %a.coerce) { -;entry: -; %0 = bitcast float %a.coerce to i32 -; %tmp.0.extract.trunc = trunc i32 %0 to i16 -; %1 = bitcast i16 %tmp.0.extract.trunc to half -; %vecinit = insertelement <8 x half> undef, half %1, i32 0 -; %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer -; ret <8 x half> %vecinit8 -;} -; -;define dso_local <4 x half> @test_vdup_n_f16(float %a.coerce) { -;entry: -; %0 = bitcast float %a.coerce to i32 -; %tmp.0.extract.trunc = trunc i32 %0 to i16 -; %1 = bitcast i16 %tmp.0.extract.trunc to half -; %vecinit = insertelement <4 x half> undef, half %1, i32 0 -; %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer -; ret <4 x half> %vecinit4 -;} -; -;define dso_local <8 x half> @test_vdupq_n_f16(float %a.coerce) { -;entry: -; %0 = bitcast float %a.coerce to i32 -; %tmp.0.extract.trunc = trunc i32 %0 to i16 -; %1 = bitcast i16 %tmp.0.extract.trunc to half -; %vecinit = insertelement <8 x half> undef, half %1, i32 0 -; %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer -; ret <8 x half> %vecinit8 -;} -; -;define dso_local <4 x half> @test_vdup_lane_f16(<4 x half> %a) { -;entry: -; %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> -; ret <4 x half> %shuffle -;} -; -;define dso_local <8 x half> @test_vdupq_lane_f16(<4 x half> %a) { -;entry: -; %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -; ret <8 x half> %shuffle -;} + +define dso_local <4 x half> @test_vmov_n_f16(float %a.coerce) { +; CHECK-LABEL: test_vmov_n_f16: +; CHECK: vdup.16 d0, d0[0] +; CHECK-NEXT: bx lr +entry: + %0 = bitcast float %a.coerce to i32 + %tmp.0.extract.trunc = trunc i32 %0 to i16 + %1 = bitcast i16 %tmp.0.extract.trunc to half + %vecinit = insertelement <4 x half> undef, half %1, i32 0 + %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer + ret <4 x half> %vecinit4 +} + +define dso_local <8 x half> @test_vmovq_n_f16(float %a.coerce) { +; CHECK-LABEL: test_vmovq_n_f16: +; CHECK: vdup.16 q0, d0[0] +; CHECK-NEXT: bx lr +entry: + %0 = bitcast float %a.coerce to i32 + %tmp.0.extract.trunc = trunc i32 %0 to i16 + %1 = bitcast i16 %tmp.0.extract.trunc to half + %vecinit = insertelement <8 x half> undef, half %1, i32 0 + %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer + ret <8 x half> %vecinit8 +} + +define dso_local <4 x half> @test_vdup_n_f16(float %a.coerce) { +; CHECK-LABEL: test_vdup_n_f16: +; CHECK: vdup.16 d0, d0[0] +; CHECK-NEXT: bx lr +entry: + %0 = bitcast float %a.coerce to i32 + %tmp.0.extract.trunc = trunc i32 %0 to i16 + %1 = bitcast i16 %tmp.0.extract.trunc to half + %vecinit = insertelement <4 x half> undef, half %1, i32 0 + %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer + ret <4 x half> %vecinit4 +} + +define dso_local <8 x half> @test_vdupq_n_f16(float %a.coerce) { +; CHECK-LABEL: test_vdupq_n_f16: +; CHECK: vdup.16 q0, d0[0] +; CHECK-NEXT: bx lr +entry: + %0 = bitcast float %a.coerce to i32 + %tmp.0.extract.trunc = trunc i32 %0 to i16 + %1 = bitcast i16 %tmp.0.extract.trunc to half + %vecinit = insertelement <8 x half> undef, half %1, i32 0 + %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer + ret <8 x half> %vecinit8 +} + +define dso_local <4 x half> @test_vdup_lane_f16(<4 x half> %a) { +; CHECK-LABEL: test_vdup_lane_f16: +; CHECK: vdup.32 d0, d0[3] +; CHECK-NEXT: bx lr +entry: + %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> + ret <4 x half> %shuffle +} + +define dso_local <8 x half> @test_vdupq_lane_f16(<4 x half> %a) { +; CHECK-LABEL: test_vdupq_lane_f16: +; CHECK: vdup.16 q0, d0[3] +; CHECK-NEXT: bx lr +entry: + %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> + ret <8 x half> %shuffle +} + +; FIXME (PR38404) ; ;define dso_local <4 x half> @test_vext_f16(<4 x half> %a, <4 x half> %b) { ;entry: |

