summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorOliver Stannard <oliver.stannard@arm.com>2015-11-09 11:03:18 +0000
committerOliver Stannard <oliver.stannard@arm.com>2015-11-09 11:03:18 +0000
commit563585789c1bc44fecdf8ea6ae8dd2dcaeb69f28 (patch)
treebdeb99b3ef15dc9036e3f161527b8c4917bc9d68 /llvm
parent4c9c9375422b1b4c67f5c3e172bcfd1c84af7c94 (diff)
downloadbcm5719-llvm-563585789c1bc44fecdf8ea6ae8dd2dcaeb69f28.tar.gz
bcm5719-llvm-563585789c1bc44fecdf8ea6ae8dd2dcaeb69f28.zip
[CodeGen] Always promote f16 if not legal
We don't currently have any runtime library functions for operations on f16 values (other than conversions to and from f32 and f64), so we should always promote it to f32, even if that is not a legal type. In that case, the f32 values would be softened to f32 library calls. SoftenFloatRes_FP_EXTEND now needs to check the promoted operand's type, as it may ne a no-op or require a different library call. getCopyFromParts and getCopyToParts now need to cope with a floating-point value stored in a larger integer part, as is the case for any target that needs to store an f16 value in a 32-bit integer register. Differential Revision: http://reviews.llvm.org/D12856 llvm-svn: 252459
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp9
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp14
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp20
-rw-r--r--llvm/test/CodeGen/ARM/fp16-promote.ll314
4 files changed, 180 insertions, 177 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 56a79612f60..97e88bf84a7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -418,6 +418,15 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
SoftenFloatResult(Op.getNode(), 0);
}
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) {
+ Op = GetPromotedFloat(Op);
+ // If the promotion did the FP_EXTEND to the destination type for us,
+ // there's nothing left to do here.
+ if (Op.getValueType() == N->getValueType(0)) {
+ return BitConvertToInteger(Op);
+ }
+ }
+
RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftenFloat)
Op = GetSoftenedFloat(Op);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 2f5e329c95b..60b06b4ab0e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -198,6 +198,14 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL,
if (PartEVT == ValueVT)
return Val;
+ if (PartEVT.isInteger() && ValueVT.isFloatingPoint() &&
+ ValueVT.bitsLT(PartEVT)) {
+ // For an FP value in an integer part, we need to truncate to the right
+ // width first.
+ PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+ Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val);
+ }
+
if (PartEVT.isInteger() && ValueVT.isInteger()) {
if (ValueVT.bitsLT(PartEVT)) {
// For a truncate, see if we have any information to
@@ -384,6 +392,12 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL,
assert(NumParts == 1 && "Do not know what to promote to!");
Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
} else {
+ if (ValueVT.isFloatingPoint()) {
+ // FP values need to be bitcast, then extended if they are being put
+ // into a larger container.
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+ Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+ }
assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
ValueVT.isInteger() &&
"Unknown mismatch!");
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 24bdef3040e..69eec888745 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -1277,20 +1277,14 @@ void TargetLoweringBase::computeRegisterProperties(
ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
}
+ // Decide how to handle f16. If the target does not have native f16 support,
+ // promote it to f32, because there are no f16 library calls (except for
+ // conversions).
if (!isTypeLegal(MVT::f16)) {
- // If the target has native f32 support, promote f16 operations to f32. If
- // f32 is not supported, generate soft float library calls.
- if (isTypeLegal(MVT::f32)) {
- NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32];
- RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32];
- TransformToType[MVT::f16] = MVT::f32;
- ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat);
- } else {
- NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16];
- RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16];
- TransformToType[MVT::f16] = MVT::i16;
- ValueTypeActions.setTypeAction(MVT::f16, TypeSoftenFloat);
- }
+ NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32];
+ RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32];
+ TransformToType[MVT::f16] = MVT::f32;
+ ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat);
}
// Loop over all of the vector value types to see which need transformations.
diff --git a/llvm/test/CodeGen/ARM/fp16-promote.ll b/llvm/test/CodeGen/ARM/fp16-promote.ll
index 0352e5e2a79..2a2eb8d2b6b 100644
--- a/llvm/test/CodeGen/ARM/fp16-promote.ll
+++ b/llvm/test/CodeGen/ARM/fp16-promote.ll
@@ -1,18 +1,18 @@
-; RUN: llc -asm-verbose=false < %s -mattr=+vfp3,+fp16 | FileCheck %s -check-prefix=CHECK-FP16 -check-prefix=CHECK-ALL
-; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=CHECK-LIBCALL -check-prefix=CHECK-ALL
+; RUN: llc -asm-verbose=false < %s -mattr=+vfp3,+fp16 | FileCheck %s -check-prefix=CHECK-FP16 --check-prefix=CHECK-VFP -check-prefix=CHECK-ALL
+; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=CHECK-LIBCALL --check-prefix=CHECK-VFP -check-prefix=CHECK-ALL --check-prefix=CHECK-LIBCALL-VFP
+; RUN: llc -asm-verbose=false < %s -mattr=-vfp2 | FileCheck %s --check-prefix=CHECK-LIBCALL -check-prefix=CHECK-NOVFP -check-prefix=CHECK-ALL
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
target triple = "armv7---eabihf"
-; CHECK-FP16-LABEL: test_fadd:
+; CHECK-ALL-LABEL: test_fadd:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vadd.f32
-; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_fadd:
; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vadd.f32
+; CHECK-VFP: vadd.f32
+; CHECK-NOVFP: bl __aeabi_fadd
+; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_fadd(half* %p, half* %q) #0 {
%a = load half, half* %p, align 2
@@ -22,15 +22,14 @@ define void @test_fadd(half* %p, half* %q) #0 {
ret void
}
-; CHECK-FP16-LABEL: test_fsub:
+; CHECK-ALL-LABEL: test_fsub:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vsub.f32
-; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_fsub:
; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vsub.f32
+; CHECK-VFP: vsub.f32
+; CHECK-NOVFP: bl __aeabi_fsub
+; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_fsub(half* %p, half* %q) #0 {
%a = load half, half* %p, align 2
@@ -40,15 +39,14 @@ define void @test_fsub(half* %p, half* %q) #0 {
ret void
}
-; CHECK-FP16-LABEL: test_fmul:
+; CHECK-ALL-LABEL: test_fmul:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vmul.f32
-; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_fmul
; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vmul.f32
+; CHECK-VFP: vmul.f32
+; CHECK-NOVFP: bl __aeabi_fmul
+; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_fmul(half* %p, half* %q) #0 {
%a = load half, half* %p, align 2
@@ -58,15 +56,14 @@ define void @test_fmul(half* %p, half* %q) #0 {
ret void
}
-; CHECK-FP16-LABEL: test_fdiv:
+; CHECK-ALL-LABEL: test_fdiv:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vdiv.f32
-; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_fdiv
; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vdiv.f32
+; CHECK-VFP: vdiv.f32
+; CHECK-NOVFP: bl __aeabi_fdiv
+; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_fdiv(half* %p, half* %q) #0 {
%a = load half, half* %p, align 2
@@ -76,15 +73,13 @@ define void @test_fdiv(half* %p, half* %q) #0 {
ret void
}
-; CHECK-FP16-LABEL: test_frem:
+; CHECK-ALL-LABEL: test_frem:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: bl fmodf
-; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_frem
; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl fmodf
+; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_frem(half* %p, half* %q) #0 {
%a = load half, half* %p, align 2
@@ -96,9 +91,8 @@ define void @test_frem(half* %p, half* %q) #0 {
; CHECK-ALL-LABEL: test_load_store:
; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: ldrh r0, [r0]
-; CHECK-ALL-NEXT: strh r0, [r1]
-; CHECK-ALL-NEXT: bx lr
+; CHECK-ALL: ldrh {{r[0-9]+}}, [{{r[0-9]+}}]
+; CHECK-ALL: strh {{r[0-9]+}}, [{{r[0-9]+}}]
define void @test_load_store(half* %p, half* %q) #0 {
%a = load half, half* %p, align 2
store half %a, half* %q
@@ -125,9 +119,12 @@ define half @test_call(half %a, half %b) #0 {
; CHECK-ALL-NEXT: .fnstart
; CHECK-ALL-NEXT: .save {r11, lr}
; CHECK-ALL-NEXT: push {r11, lr}
-; CHECK-ALL-NEXT: vmov.f32 s2, s0
-; CHECK-ALL-NEXT: vmov.f32 s0, s1
-; CHECK-ALL-NEXT: vmov.f32 s1, s2
+; CHECK-VFP-NEXT: vmov.f32 s2, s0
+; CHECK-VFP-NEXT: vmov.f32 s0, s1
+; CHECK-VFP-NEXT: vmov.f32 s1, s2
+; CHECK-NOVFP-NEXT: mov r2, r0
+; CHECK-NOVFP-NEXT: mov r0, r1
+; CHECK-NOVFP-NEXT: mov r1, r2
; CHECK-ALL-NEXT: bl test_callee
; CHECK-ALL-NEXT: pop {r11, pc}
define half @test_call_flipped(half %a, half %b) #0 {
@@ -137,9 +134,12 @@ define half @test_call_flipped(half %a, half %b) #0 {
; CHECK-ALL-LABEL: test_tailcall_flipped:
; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: vmov.f32 s2, s0
-; CHECK-ALL-NEXT: vmov.f32 s0, s1
-; CHECK-ALL-NEXT: vmov.f32 s1, s2
+; CHECK-VFP-NEXT: vmov.f32 s2, s0
+; CHECK-VFP-NEXT: vmov.f32 s0, s1
+; CHECK-VFP-NEXT: vmov.f32 s1, s2
+; CHECK-NOVFP-NEXT: mov r2, r0
+; CHECK-NOVFP-NEXT: mov r0, r1
+; CHECK-NOVFP-NEXT: mov r1, r2
; CHECK-ALL-NEXT: b test_callee
define half @test_tailcall_flipped(half %a, half %b) #0 {
%r = tail call half @test_callee(half %b, half %a)
@@ -149,12 +149,10 @@ define half @test_tailcall_flipped(half %a, half %b) #0 {
; Optimizer picks %p or %q based on %c and only loads that value
; No conversion is needed
; CHECK-ALL-LABEL: test_select:
-; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: cmp r2, #0
-; CHECK-ALL-NEXT: movne r1, r0
-; CHECK-ALL-NEXT: ldrh r1, [r1]
-; CHECK-ALL-NEXT: strh r1, [r0]
-; CHECK-ALL-NEXT: bx lr
+; CHECK-ALL: cmp {{r[0-9]+}}, #0
+; CHECK-ALL: movne {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK-ALL: ldrh {{r[0-9]+}}, [{{r[0-9]+}}]
+; CHECK-ALL: strh {{r[0-9]+}}, [{{r[0-9]+}}]
define void @test_select(half* %p, half* %q, i1 zeroext %c) #0 {
%a = load half, half* %p, align 2
%b = load half, half* %q, align 2
@@ -165,17 +163,15 @@ define void @test_select(half* %p, half* %q, i1 zeroext %c) #0 {
; Test only two variants of fcmp. These get translated to f32 vcmpe
; instructions anyway.
-; CHECK-FP16-LABEL: test_fcmp_une:
+; CHECK-ALL-LABEL: test_fcmp_une:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcmpe.f32
-; CHECK-FP16: vmrs APSR_nzcv, fpscr
-; CHECK-FP16: movwne
-; CHECK-LIBCALL-LABEL: test_fcmp_une:
; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vcmpe.f32
-; CHECK-LIBCALL: movwne
+; CHECK-VFP: vcmpe.f32
+; CHECK-NOVFP: bl __aeabi_fcmpeq
+; CHECK-FP16: vmrs APSR_nzcv, fpscr
+; CHECK-ALL: movw{{ne|eq}}
define i1 @test_fcmp_une(half* %p, half* %q) #0 {
%a = load half, half* %p, align 2
%b = load half, half* %q, align 2
@@ -183,18 +179,15 @@ define i1 @test_fcmp_une(half* %p, half* %q) #0 {
ret i1 %r
}
-; CHECK-FP16-LABEL: test_fcmp_ueq:
+; CHECK-ALL-LABEL: test_fcmp_ueq:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcmpe.f32
-; CHECK-FP16: vmrs APSR_nzcv, fpscr
-; CHECK-FP16: movweq
-; CHECK-FP16: movwvs
-; CHECK-LIBCALL-LABEL: test_fcmp_ueq:
; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vcmpe.f32
-; CHECK-LIBCALL: movweq
+; CHECK-VFP: vcmpe.f32
+; CHECK-NOVFP: bl __aeabi_fcmpeq
+; CHECK-FP16: vmrs APSR_nzcv, fpscr
+; CHECK-LIBCALL: movw{{ne|eq}}
define i1 @test_fcmp_ueq(half* %p, half* %q) #0 {
%a = load half, half* %p, align 2
%b = load half, half* %q, align 2
@@ -202,19 +195,18 @@ define i1 @test_fcmp_ueq(half* %p, half* %q) #0 {
ret i1 %r
}
-; CHECK-FP16-LABEL: test_br_cc:
+; CHECK-ALL-LABEL: test_br_cc:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcmpe.f32
-; CHECK-FP16: vmrs APSR_nzcv, fpscr
-; CHECK-FP16: strmi
-; CHECK-FP16: strpl
-; CHECK-LIBCALL-LABEL: test_br_cc:
; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vcmpe.f32
-; CHECK-LIBCALL: strmi
-; CHECK-LIBCALL: strpl
+; CHECK-VFP: vcmpe.f32
+; CHECK-NOVFP: bl __aeabi_fcmplt
+; CHECK-FP16: vmrs APSR_nzcv, fpscr
+; CHECK-VFP: strmi
+; CHECK-VFP: strpl
+; CHECK-NOVFP: strne
+; CHECK-NOVFP: streq
define void @test_br_cc(half* %p, half* %q, i32* %p1, i32* %p2) #0 {
%a = load half, half* %p, align 2
%b = load half, half* %q, align 2
@@ -229,20 +221,19 @@ else:
}
declare i1 @test_dummy(half* %p) #0
-; CHECK-FP16-LABEL: test_phi:
+; CHECK-ALL-LABEL: test_phi:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: [[LOOP:.LBB[1-9_]+]]:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: bl test_dummy
; CHECK-FP16: bne [[LOOP]]
; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_phi:
-; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL-VFP: bl __aeabi_h2f
; CHECK-LIBCALL: [[LOOP:.LBB[1-9_]+]]:
-; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL-VFP: bl __aeabi_h2f
; CHECK-LIBCALL: bl test_dummy
; CHECK-LIBCALL: bne [[LOOP]]
-; CHECK-LIBCALL: bl __aeabi_f2h
+; CHECK-LIBCALL-VFP: bl __aeabi_f2h
define void @test_phi(half* %p) #0 {
entry:
%a = load half, half* %p
@@ -257,59 +248,52 @@ return:
ret void
}
-; CHECK-FP16-LABEL: test_fptosi_i32:
+; CHECK-ALL-LABEL: test_fptosi_i32:
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcvt.s32.f32
-; CHECK-LIBCALL-LABEL: test_fptosi_i32:
; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vcvt.s32.f32
+; CHECK-VFP: vcvt.s32.f32
+; CHECK-NOVFP: bl __aeabi_f2iz
define i32 @test_fptosi_i32(half* %p) #0 {
%a = load half, half* %p, align 2
%r = fptosi half %a to i32
ret i32 %r
}
-; CHECK-FP16-LABEL: test_fptosi_i64:
+; CHECK-ALL-LABEL: test_fptosi_i64:
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: bl __aeabi_f2lz
-; CHECK-LIBCALL-LABEL: test_fptosi_i64:
; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: bl __aeabi_f2lz
+; CHECK-ALL: bl __aeabi_f2lz
define i64 @test_fptosi_i64(half* %p) #0 {
%a = load half, half* %p, align 2
%r = fptosi half %a to i64
ret i64 %r
}
-; CHECK-FP16-LABEL: test_fptoui_i32:
+; CHECK-ALL-LABEL: test_fptoui_i32:
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcvt.u32.f32
-; CHECK-LIBCALL-LABEL: test_fptoui_i32:
; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vcvt.u32.f32
+; CHECK-VFP: vcvt.u32.f32
+; CHECK-NOVFP: bl __aeabi_f2uiz
define i32 @test_fptoui_i32(half* %p) #0 {
%a = load half, half* %p, align 2
%r = fptoui half %a to i32
ret i32 %r
}
-; CHECK-FP16-LABEL: test_fptoui_i64:
+; CHECK-ALL-LABEL: test_fptoui_i64:
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: bl __aeabi_f2ulz
-; CHECK-LIBCALL-LABEL: test_fptoui_i64:
; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: bl __aeabi_f2ulz
+; CHECK-ALL: bl __aeabi_f2ulz
define i64 @test_fptoui_i64(half* %p) #0 {
%a = load half, half* %p, align 2
%r = fptoui half %a to i64
ret i64 %r
}
-; CHECK-FP16-LABEL: test_sitofp_i32:
-; CHECK-FP16: vcvt.f32.s32
+; CHECK-ALL-LABEL: test_sitofp_i32:
+; CHECK-VFP: vcvt.f32.s32
+; CHECK-NOVFP: bl __aeabi_i2f
; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_sitofp_i32:
-; CHECK-LIBCALL: vcvt.f32.s32
; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_sitofp_i32(i32 %a, half* %p) #0 {
%r = sitofp i32 %a to half
@@ -317,11 +301,10 @@ define void @test_sitofp_i32(i32 %a, half* %p) #0 {
ret void
}
-; CHECK-FP16-LABEL: test_uitofp_i32:
-; CHECK-FP16: vcvt.f32.u32
+; CHECK-ALL-LABEL: test_uitofp_i32:
+; CHECK-VFP: vcvt.f32.u32
+; CHECK-NOVFP: bl __aeabi_ui2f
; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_uitofp_i32:
-; CHECK-LIBCALL: vcvt.f32.u32
; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_uitofp_i32(i32 %a, half* %p) #0 {
%r = uitofp i32 %a to half
@@ -329,11 +312,9 @@ define void @test_uitofp_i32(i32 %a, half* %p) #0 {
ret void
}
-; CHECK-FP16-LABEL: test_sitofp_i64:
-; CHECK-FP16: bl __aeabi_l2f
+; CHECK-ALL-LABEL: test_sitofp_i64:
+; CHECK-ALL: bl __aeabi_l2f
; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_sitofp_i64:
-; CHECK-LIBCALL: bl __aeabi_l2f
; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_sitofp_i64(i64 %a, half* %p) #0 {
%r = sitofp i64 %a to half
@@ -341,11 +322,9 @@ define void @test_sitofp_i64(i64 %a, half* %p) #0 {
ret void
}
-; CHECK-FP16-LABEL: test_uitofp_i64:
-; CHECK-FP16: bl __aeabi_ul2f
+; CHECK-ALL-LABEL: test_uitofp_i64:
+; CHECK-ALL: bl __aeabi_ul2f
; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_uitofp_i64:
-; CHECK-LIBCALL: bl __aeabi_ul2f
; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_uitofp_i64(i64 %a, half* %p) #0 {
%r = uitofp i64 %a to half
@@ -385,10 +364,10 @@ define float @test_fpextend_float(half* %p) {
; CHECK-FP16-LABEL: test_fpextend_double:
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcvt.f64.f32
; CHECK-LIBCALL-LABEL: test_fpextend_double:
; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vcvt.f64.f32
+; CHECK-VFP: vcvt.f64.f32
+; CHECK-NOVFP: bl __aeabi_f2d
define double @test_fpextend_double(half* %p) {
%a = load half, half* %p, align 2
%r = fpext half %a to double
@@ -438,13 +417,13 @@ declare half @llvm.nearbyint.f16(half %a) #0
declare half @llvm.round.f16(half %a) #0
declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0
-; CHECK-FP16-LABEL: test_sqrt:
+; CHECK-ALL-LABEL: test_sqrt:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: vsqrt.f32
; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_sqrt:
; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vsqrt.f32
+; CHECK-VFP-LIBCALL: vsqrt.f32
+; CHECK-NOVFP: bl sqrtf
; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_sqrt(half* %p) #0 {
%a = load half, half* %p, align 2
@@ -671,7 +650,10 @@ define void @test_maxnum(half* %p, half* %q) #0 {
; CHECK-LIBCALL-LABEL: test_copysign:
; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vbsl
+; CHECK-VFP-LIBCALL: vbsl
+; CHECK-NOVFP: bfc
+; CHECK-NOVFP: and
+; CHECK-NOVFP: orr
; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_copysign(half* %p, half* %q) #0 {
%a = load half, half* %p, align 2
@@ -781,7 +763,8 @@ define void @test_round(half* %p) {
; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: vmla.f32
+; CHECK-VFP-LIBCALL: vmla.f32
+; CHECK-NOVFP: bl __aeabi_fmul
; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_fmuladd(half* %p, half* %q, half* %r) #0 {
%a = load half, half* %p, align 2
@@ -797,31 +780,28 @@ define void @test_fmuladd(half* %p, half* %q, half* %r) #0 {
; and extractelement have these extra loads and stores.
; CHECK-ALL-LABEL: test_insertelement:
-; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: .pad #8
-; CHECK-ALL-NEXT: sub sp, sp, #8
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: mov
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: add
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: add sp, sp, #8
-; CHECK-ALL-NEXT: bx lr
+; CHECK-ALL: sub sp, sp, #8
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: mov
+; CHECK-ALL-DAG: ldrh
+; CHECK-ALL-DAG: add
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: add sp, sp, #8
define void @test_insertelement(half* %p, <4 x half>* %q, i32 %i) #0 {
%a = load half, half* %p, align 2
%b = load <4 x half>, <4 x half>* %q, align 8
@@ -831,23 +811,30 @@ define void @test_insertelement(half* %p, <4 x half>* %q, i32 %i) #0 {
}
; CHECK-ALL-LABEL: test_extractelement:
-; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: .pad #8
-; CHECK-ALL-NEXT: sub sp, sp, #8
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: orr
-; CHECK-ALL-NEXT: str
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: orr
-; CHECK-ALL-NEXT: str
-; CHECK-ALL-NEXT: mov
-; CHECK-ALL-NEXT: add
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: add sp, sp, #8
-; CHECK-ALL-NEXT: bx lr
+; CHECK-VFP: sub sp, sp, #8
+; CHECK-VFP: ldrh
+; CHECK-VFP: ldrh
+; CHECK-VFP: orr
+; CHECK-VFP: str
+; CHECK-VFP: ldrh
+; CHECK-VFP: ldrh
+; CHECK-VFP: orr
+; CHECK-VFP: str
+; CHECK-VFP: mov
+; CHECK-VFP: add
+; CHECK-VFP: ldrh
+; CHECK-VFP: strh
+; CHECK-VFP: add sp, sp, #8
+; CHECK-VFP: bx lr
+; CHECK-NOVFP: ldrh
+; CHECK-NOVFP: strh
+; CHECK-NOVFP: ldrh
+; CHECK-NOVFP: strh
+; CHECK-NOVFP: ldrh
+; CHECK-NOVFP: strh
+; CHECK-NOVFP: ldrh
+; CHECK-NOVFP: strh
+; CHECK-NOVFP: ldrh
define void @test_extractelement(half* %p, <4 x half>* %q, i32 %i) #0 {
%a = load <4 x half>, <4 x half>* %q, align 8
%b = extractelement <4 x half> %a, i32 %i
@@ -860,12 +847,10 @@ define void @test_extractelement(half* %p, <4 x half>* %q, i32 %i) #0 {
%struct.dummy = type { i32, half }
; CHECK-ALL-LABEL: test_insertvalue:
-; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: ldr
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: str
-; CHECK-ALL-NEXT: bx lr
+; CHECK-ALL-DAG: ldr
+; CHECK-ALL-DAG: ldrh
+; CHECK-ALL-DAG: strh
+; CHECK-ALL-DAG: str
define void @test_insertvalue(%struct.dummy* %p, half* %q) {
%a = load %struct.dummy, %struct.dummy* %p
%b = load half, half* %q
@@ -875,10 +860,9 @@ define void @test_insertvalue(%struct.dummy* %p, half* %q) {
}
; CHECK-ALL-LABEL: test_extractvalue:
-; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: bx lr
+; CHECK-ALL: .fnstart
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
define void @test_extractvalue(%struct.dummy* %p, half* %q) {
%a = load %struct.dummy, %struct.dummy* %p
%b = extractvalue %struct.dummy %a, 1
@@ -886,10 +870,11 @@ define void @test_extractvalue(%struct.dummy* %p, half* %q) {
ret void
}
-; CHECK-FP16-LABEL: test_struct_return:
+; CHECK-ALL-LABEL: test_struct_return:
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-LIBCALL-LABEL: test_struct_return:
-; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP-LIBCALL: bl __aeabi_h2f
+; CHECK-NOVFP-DAG: ldr
+; CHECK-NOVFP-DAG: ldrh
define %struct.dummy @test_struct_return(%struct.dummy* %p) {
%a = load %struct.dummy, %struct.dummy* %p
ret %struct.dummy %a
@@ -897,6 +882,7 @@ define %struct.dummy @test_struct_return(%struct.dummy* %p) {
; CHECK-ALL-LABEL: test_struct_arg:
; CHECK-ALL-NEXT: .fnstart
+; CHECK-NOVFP-NEXT: mov r0, r1
; CHECK-ALL-NEXT: bx lr
define half @test_struct_arg(%struct.dummy %p) {
%a = extractvalue %struct.dummy %p, 1
OpenPOWER on IntegriCloud