summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp10
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp11
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp5
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td84
4 files changed, 21 insertions, 89 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index e3c034c8a15..649dd7a349f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -373,6 +373,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = N->getOperand(0);
+
+ // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's
+ // entirely possible for both f16 and f32 to be legal, so use the fully
+ // hard-float FP_EXTEND rather than FP16_TO_FP.
+ if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32)
+ Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op);
+
RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first;
@@ -511,6 +518,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ if (N->getValueType(0) == MVT::f16)
+ return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0));
+
SDValue Op = GetSoftenedFloat(N->getOperand(0));
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::TRUNC_F32,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 189b56d5a93..c07b5e6a736 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5155,13 +5155,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::convert_to_fp16:
- setValue(&I, DAG.getNode(ISD::FP_TO_FP16, sdl,
- MVT::i16, getValue(I.getArgOperand(0))));
+ setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16,
+ DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16,
+ getValue(I.getArgOperand(0)),
+ DAG.getTargetConstant(0, MVT::i32))));
return nullptr;
case Intrinsic::convert_from_fp16:
setValue(&I,
- DAG.getNode(ISD::FP16_TO_FP, sdl, TLI->getValueType(I.getType()),
- getValue(I.getArgOperand(0))));
+ DAG.getNode(ISD::FP_EXTEND, sdl, TLI->getValueType(I.getType()),
+ DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
+ getValue(I.getArgOperand(0)))));
return nullptr;
case Intrinsic::pcmarker: {
SDValue Tmp = getValue(I.getArgOperand(0));
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 42cd4bf1fe1..e80ef7176c2 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -422,7 +422,10 @@ static void InitLibcallCallingConvs(CallingConv::ID *CCs) {
/// getFPEXT - Return the FPEXT_*_* value for the given types, or
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
- if (OpVT == MVT::f32) {
+ if (OpVT == MVT::f16) {
+ if (RetVT == MVT::f32)
+ return FPEXT_F16_F32;
+ } else if (OpVT == MVT::f32) {
if (RetVT == MVT::f64)
return FPEXT_F32_F64;
if (RetVT == MVT::f128)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 2708ee03313..0ba069e99a8 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2244,90 +2244,6 @@ def : Pat<(f64 (fpimm0)), (FMOVXDr XZR)>, Requires<[NoZCZ]>;
defm FCVT : FPConversion<"fcvt">;
-def : Pat<(fp_to_f16 FPR32:$Rn),
- (i32 (COPY_TO_REGCLASS
- (f32 (SUBREG_TO_REG (i32 0), (FCVTHSr FPR32:$Rn), hsub)),
- GPR32))>;
-
-def : Pat<(f32 (f16_to_fp i32:$Rn)),
- (FCVTSHr (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS i32:$Rn, FPR32)),
- hsub))>;
-
-// When converting from f16 coming directly from a load, make sure we
-// load into the FPR16 registers rather than going through the GPRs.
-// f16->f32
-def : Pat<(f32 (f16_to_fp (i32
- (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
- ro_Wextend16:$extend))))),
- (FCVTSHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
-def : Pat<(f32 (f16_to_fp (i32
- (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
- ro_Xextend16:$extend))))),
- (FCVTSHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
-def : Pat <(f32 (f16_to_fp (i32
- (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
- (FCVTSHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
-def : Pat <(f32 (f16_to_fp (i32
- (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
- (FCVTSHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
-
-// f16->f64
-def : Pat<(f64 (fextend (f32 (f16_to_fp (i32
- (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
- ro_Wextend16:$extend))))))),
- (FCVTDHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
-def : Pat<(f64 (fextend (f32 (f16_to_fp (i32
- (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
- ro_Xextend16:$extend))))))),
- (FCVTDHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
-def : Pat <(f64 (fextend (f32 (f16_to_fp (i32
- (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))))),
- (FCVTDHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
-def : Pat <(f64 (fextend (f32 (f16_to_fp (i32
- (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))))),
- (FCVTDHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
-
-// When converting to f16 going directly to a store, make sure we use the
-// appropriate direct conversion instructions and store via the FPR16
-// registers rather than going through the GPRs.
-let AddedComplexity = 10 in {
-// f32->f16
-def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
- (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
- ro_Wextend16:$extend)),
- (STRHroW (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR32:$Rm,
- ro_Wextend16:$extend)>;
-def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
- (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
- ro_Xextend16:$extend)),
- (STRHroX (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR64:$Rm,
- ro_Xextend16:$extend)>;
-def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
- (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
- (STRHui (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
-def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
- (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
- (STURHi (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, simm9:$offset)>;
-// f64->f16
-def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
- (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
- ro_Wextend16:$extend)),
- (STRHroW (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR32:$Rm,
- ro_Wextend16:$extend)>;
-def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
- (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
- ro_Xextend16:$extend)),
- (STRHroX (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR64:$Rm,
- ro_Xextend16:$extend)>;
-def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
- (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
- (STRHui (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
-def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
- (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
- (STURHi (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, simm9:$offset)>;
-}
-
-
//===----------------------------------------------------------------------===//
// Floating point single operand instructions.
//===----------------------------------------------------------------------===//
OpenPOWER on IntegriCloud