diff options
| author | Diogo N. Sampaio <diogo.sampaio@arm.com> | 2019-04-23 09:36:39 +0000 | 
|---|---|---|
| committer | Diogo N. Sampaio <diogo.sampaio@arm.com> | 2019-04-23 09:36:39 +0000 | 
| commit | 2619f399f99573609be11c608f5f20f1dab595f0 (patch) | |
| tree | 57ce478e96dcb52edd7eb6c893e86c9801a2e4a1 /llvm | |
| parent | 545f621a7c76d0a2fc9b2f657c38da8252d2f9cc (diff) | |
| download | bcm5719-llvm-2619f399f99573609be11c608f5f20f1dab595f0.tar.gz bcm5719-llvm-2619f399f99573609be11c608f5f20f1dab595f0.zip  | |
[ARM][FIX] Add missing f16.lane.vldN/vstN lowering
Summary:
Add missing D and Q lane VLDSTLane lowering
for fp16 elements.
Reviewers: efriedma, kosarev, SjoerdMeijer, ostannard
Reviewed By: efriedma
Subscribers: javed.absar, kristof.beyls, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60874
llvm-svn: 358962
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll | 112 | 
2 files changed, 114 insertions, 0 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index b9c4317c9cf..cb66d16a194 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2092,10 +2092,12 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,    default: llvm_unreachable("unhandled vld/vst lane type");      // Double-register operations:    case MVT::v8i8:  OpcodeIndex = 0; break; +  case MVT::v4f16:    case MVT::v4i16: OpcodeIndex = 1; break;    case MVT::v2f32:    case MVT::v2i32: OpcodeIndex = 2; break;      // Quad-register operations: +  case MVT::v8f16:    case MVT::v8i16: OpcodeIndex = 0; break;    case MVT::v4f32:    case MVT::v4i32: OpcodeIndex = 1; break; diff --git a/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll b/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll index 442ae5dfbbb..789a6d6aaa3 100644 --- a/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll +++ b/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll @@ -1319,3 +1319,115 @@ declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>)  declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)  declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>)  declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) +declare { <8 x half>, <8 x half> } @llvm.arm.neon.vld2lane.v8f16.p0i8(i8*, <8 x half>, <8 x half>, i32, i32) +declare { <4 x half>, <4 x half> } @llvm.arm.neon.vld2lane.v4f16.p0i8(i8*, <4 x half>, <4 x half>, i32, i32) +declare { <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld3lane.v8f16.p0i8(i8*, <8 x half>, <8 x half>, <8 x half>, i32, i32) +declare { <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld3lane.v4f16.p0i8(i8*, <4 x half>, <4 x half>, <4 x half>, i32, i32) +declare { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld4lane.v8f16.p0i8(i8*, <8 x half>, <8 x half>, <8 x half>, <8 x half>, i32, i32) +declare { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld4lane.v4f16.p0i8(i8*, <4 x half>, <4 x half>, <4 x half>, <4 x half>, i32, i32) +declare void @llvm.arm.neon.vst2lane.p0i8.v8f16(i8*, <8 x half>, <8 x half>, i32, i32) +declare void @llvm.arm.neon.vst2lane.p0i8.v4f16(i8*, <4 x half>, <4 x half>, i32, i32) +declare void @llvm.arm.neon.vst3lane.p0i8.v8f16(i8*, <8 x half>, <8 x half>, <8 x half>, i32, i32) +declare void @llvm.arm.neon.vst3lane.p0i8.v4f16(i8*, <4 x half>, <4 x half>, <4 x half>, i32, i32) +declare void @llvm.arm.neon.vst4lane.p0i8.v8f16(i8*, <8 x half>, <8 x half>, <8 x half>, <8 x half>, i32, i32) +declare void @llvm.arm.neon.vst4lane.p0i8.v4f16(i8*, <4 x half>, <4 x half>, <4 x half>, <4 x half>, i32, i32) + +define { <8 x half>, <8 x half> } @test_vld2q_lane_f16(i8*, <8 x half>, <8 x half>) { +; CHECK-LABEL: test_vld2q_lane_f16: +; CHECK:    vld2.16 {d1[3], d3[3]}, [r0] +; CHECK-NEXT:    bx lr +entry: +  %3 = tail call { <8 x half>, <8 x half> } @llvm.arm.neon.vld2lane.v8f16.p0i8(i8* %0, <8 x half> %1, <8 x half> %2, i32 7, i32 2) +  ret { <8 x half>, <8 x half> } %3 +} + +define { <4 x half>, <4 x half> } @test_vld2_lane_f16(i8*, <4 x half>, <4 x half>) { +; CHECK-LABEL: test_vld2_lane_f16: +; CHECK:       vld2.16 {d0[3], d1[3]}, [r0] +; CHECK-NEXT:  bx lr +entry: +  %3 = tail call { <4 x half>, <4 x half> } @llvm.arm.neon.vld2lane.v4f16.p0i8(i8* %0, <4 x half> %1, <4 x half> %2, i32 3, i32 2) +  ret { <4 x half>, <4 x half> } %3 +} + +define { <8 x half>, <8 x half>, <8 x half> } @test_vld3q_lane_f16(i8*, <8 x half>, <8 x half>, <8 x half>) { +; CHECK-LABEL: test_vld3q_lane_f16: +; CHECK:       vld3.16 {d1[3], d3[3], d5[3]}, [r0] +; CHECK-NEXT:  bx lr +entry: +  %4 = tail call { <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld3lane.v8f16.p0i8(i8* %0, <8 x half> %1, <8 x half> %2, <8 x half> %3, i32 7, i32 2) +  ret { <8 x half>, <8 x half>, <8 x half> } %4 +} + +define { <4 x half>, <4 x half>, <4 x half> } @test_vld3_lane_f16(i8*, <4 x half>, <4 x half>, <4 x half>) { +; CHECK-LABEL: test_vld3_lane_f16: +; CHECK:       vld3.16 {d0[3], d1[3], d2[3]}, [r0] +; CHECK-NEXT:  bx lr +entry: +  %4 = tail call { <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld3lane.v4f16.p0i8(i8* %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, i32 3, i32 2) +  ret { <4 x half>, <4 x half>, <4 x half> } %4 +} +define { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @test_vld4lane_v8f16_p0i8(i8*, <8 x half>, <8 x half>, <8 x half>, <8 x half>) { +; CHECK-LABEL: test_vld4lane_v8f16_p0i8: +; CHECK:       vld4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0] +; CHECK-NEXT:  bx lr +entry: +  %5 = tail call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld4lane.v8f16.p0i8(i8* %0, <8 x half> %1, <8 x half> %2, <8 x half> %3, <8 x half> %4, i32 7, i32 2) +  ret { <8 x half>, <8 x half>, <8 x half>, <8 x half> } %5 +} +define { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @test_vld4lane_v4f16_p0i8(i8*, <4 x half>, <4 x half>, <4 x half>, <4 x half>) { +; CHECK-LABEL: test_vld4lane_v4f16_p0i8: +; CHECK:       vld4.16 {d0[3], d1[3], d2[3], d3[3]}, [r0] +; CHECK-NEXT:  bx lr +entry: + %5 = tail call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld4lane.v4f16.p0i8(i8* %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, <4 x half> %4, i32 3, i32 2) + ret { <4 x half>, <4 x half>, <4 x half>, <4 x half> } %5 +} +define void @test_vst2lane_p0i8_v8f16(i8*, <8 x half>, <8 x half>) { +; CHECK-LABEL: test_vst2lane_p0i8_v8f16: +; CHECK:       vst2.16 {d0[0], d2[0]}, [r0] +; CHECK-NEXT:  bx lr +entry: +  tail call void @llvm.arm.neon.vst2lane.p0i8.v8f16(i8* %0, <8 x half> %1, <8 x half> %2, i32 0, i32 1) +  ret void +} +define void @test_vst2lane_p0i8_v4f16(i8*, <4 x half>, <4 x half>) { +; CHECK-LABEL: test_vst2lane_p0i8_v4f16: +; CHECK:       vst2.16 {d0[0], d1[0]}, [r0:32] +; CHECK-NEXT:  bx lr +entry: +  tail call void @llvm.arm.neon.vst2lane.p0i8.v4f16(i8* %0, <4 x half> %1, <4 x half> %2, i32 0, i32 0) +  ret void +} +define void @test_vst3lane_p0i8_v8f16(i8*, <8 x half>, <8 x half>, <8 x half>) { +; CHECK-LABEL: test_vst3lane_p0i8_v8f16: +; CHECK:       vst3.16 {d0[0], d2[0], d4[0]}, [r0] +; CHECK-NEXT:  bx lr +entry: +  tail call void @llvm.arm.neon.vst3lane.p0i8.v8f16(i8* %0, <8 x half> %1, <8 x half> %2, <8 x half> %3, i32 0, i32 0) +  ret void +} +define void @test_vst3lane_p0i8_v4f16(i8*, <4 x half>, <4 x half>, <4 x half>) { +; CHECK-LABEL: test_vst3lane_p0i8_v4f16: +; CHECK:       vst3.16 {d0[0], d1[0], d2[0]}, [r0] +; CHECK-NEXT:  bx lr +entry: +  tail call void @llvm.arm.neon.vst3lane.p0i8.v4f16(i8* %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, i32 0, i32 0) +  ret void +} +define void @test_vst4lane_p0i8_v8f16(i8*, <8 x half>, <8 x half>, <8 x half>, <8 x half>) { +; CHECK-LABEL: test_vst4lane_p0i8_v8f16: +; CHECK:       vst4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0:64] +; CHECK-NEXT:  bx lr +entry: +  tail call void @llvm.arm.neon.vst4lane.p0i8.v8f16(i8* %0, <8 x half> %1, <8 x half> %2, <8 x half> %3, <8 x half> %4, i32 0, i32 0) +  ret void +} +define void @test_vst4lane_p0i8_v4f16(i8*, <4 x half>, <4 x half>, <4 x half>, <4 x half>) { +; CHECK-LABEL: test_vst4lane_p0i8_v4f16: +; CHECK:       vst4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0:64] +; CHECK-NEXT:  bx lr +entry: +  tail call void @llvm.arm.neon.vst4lane.p0i8.v4f16(i8* %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, <4 x half> %4, i32 0, i32 0) +  ret void +}  | 

