summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorOliver Stannard <oliver.stannard@arm.com>2014-08-21 12:50:31 +0000
committerOliver Stannard <oliver.stannard@arm.com>2014-08-21 12:50:31 +0000
commit51b1d460cb77a726546099f857de0492433bc321 (patch)
treee05a0172d880987a30d1737cc554f6e411fa5b38 /llvm/lib
parent18b2a258c33368fad430750ad18b2b70f23dcf35 (diff)
downloadbcm5719-llvm-51b1d460cb77a726546099f857de0492433bc321.tar.gz
bcm5719-llvm-51b1d460cb77a726546099f857de0492433bc321.zip
[ARM] Enable DP copy, load and store instructions for FPv4-SP
The FPv4-SP floating-point unit is generally referred to as single-precision only, but it does have double-precision registers and load, store and GPR<->DPR move instructions which operate on them. This patch enables the use of these registers, the main advantage of which is that we now comply with the AAPCS-VFP calling convention. This partially reverts r209650, which added some AAPCS-VFP support, but did not handle return values or alignment of double arguments in registers. This patch also adds tests for Thumb2 code generation for floating-point instructions and intrinsics, which previously only existed for ARM. llvm-svn: 216172
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp26
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp19
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp9
-rw-r--r--llvm/lib/Target/ARM/ARMCallingConv.h17
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp179
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h7
-rw-r--r--llvm/lib/Target/ARM/ARMInstrVFP.td2
7 files changed, 206 insertions, 53 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index ea88264d95f..ed3f770afec 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3517,6 +3517,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
RTLIB::FMA_F80, RTLIB::FMA_F128,
RTLIB::FMA_PPCF128));
break;
+ case ISD::FADD:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
+ RTLIB::ADD_F80, RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128));
+ break;
+ case ISD::FMUL:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
+ RTLIB::MUL_F80, RTLIB::MUL_F128,
+ RTLIB::MUL_PPCF128));
+ break;
case ISD::FP16_TO_FP: {
if (Node->getValueType(0) == MVT::f32) {
Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
@@ -3549,12 +3559,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
case ISD::FSUB: {
EVT VT = Node->getValueType(0);
- assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
- TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
- "Don't know how to expand this FP subtraction!");
- Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
- Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
- Results.push_back(Tmp1);
+ if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) {
+ Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
+ Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
+ Results.push_back(Tmp1);
+ } else {
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
+ RTLIB::SUB_F80, RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128));
+ }
break;
}
case ISD::SUB: {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e264cd9ae4a..51af1ca7de0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7265,8 +7265,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
}
if (Args[i].isNest)
Flags.setNest();
- if (NeedsRegBlock)
+ if (NeedsRegBlock) {
Flags.setInConsecutiveRegs();
+ if (Value == NumValues - 1)
+ Flags.setInConsecutiveRegsLast();
+ }
Flags.setOrigAlign(OriginalAlignment);
MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
@@ -7312,10 +7315,6 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
else if (j != 0)
MyFlags.Flags.setOrigAlign(1);
- // Only mark the end at the last register of the last value.
- if (NeedsRegBlock && Value == NumValues - 1 && j == NumParts - 1)
- MyFlags.Flags.setInConsecutiveRegsLast();
-
CLI.Outs.push_back(MyFlags);
CLI.OutVals.push_back(Parts[j]);
}
@@ -7530,8 +7529,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
}
if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
Flags.setNest();
- if (NeedsRegBlock)
+ if (NeedsRegBlock) {
Flags.setInConsecutiveRegs();
+ if (Value == NumValues - 1)
+ Flags.setInConsecutiveRegsLast();
+ }
Flags.setOrigAlign(OriginalAlignment);
MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
@@ -7544,11 +7546,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// if it isn't first piece, alignment must be 1
else if (i > 0)
MyFlags.Flags.setOrigAlign(1);
-
- // Only mark the end at the last register of the last value.
- if (NeedsRegBlock && Value == NumValues - 1 && i == NumRegs - 1)
- MyFlags.Flags.setInConsecutiveRegsLast();
-
Ins.push_back(MyFlags);
}
PartBase += VT.getStoreSize();
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 37290ad9746..6b660a4ad06 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -721,7 +721,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = ARM::VMOVRS;
else if (SPRDest && GPRSrc)
Opc = ARM::VMOVSR;
- else if (ARM::DPRRegClass.contains(DestReg, SrcReg))
+ else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP())
Opc = ARM::VMOVD;
else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
Opc = ARM::VORRq;
@@ -781,6 +781,10 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BeginIdx = ARM::dsub_0;
SubRegs = 4;
Spacing = 2;
+ } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) {
+ Opc = ARM::VMOVS;
+ BeginIdx = ARM::ssub_0;
+ SubRegs = 2;
}
assert(Opc && "Impossible reg-to-reg copy");
@@ -1231,7 +1235,8 @@ ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
// copyPhysReg() calls. Look for VMOVS instructions that can legally be
// widened to VMOVD. We prefer the VMOVD when possible because it may be
// changed into a VORR that can go down the NEON pipeline.
- if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15())
+ if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15() ||
+ Subtarget.isFPOnlySP())
return false;
// Look for a copy between even S-registers. That is where we keep floats
diff --git a/llvm/lib/Target/ARM/ARMCallingConv.h b/llvm/lib/Target/ARM/ARMCallingConv.h
index 4cb7f7a3a24..bd07236d0ea 100644
--- a/llvm/lib/Target/ARM/ARMCallingConv.h
+++ b/llvm/lib/Target/ARM/ARMCallingConv.h
@@ -177,8 +177,9 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
SmallVectorImpl<CCValAssign> &PendingHAMembers = State.getPendingLocs();
+
// AAPCS HFAs must have 1-4 elements, all of the same type
- assert(PendingHAMembers.size() < 8);
+ assert(PendingHAMembers.size() < 4);
if (PendingHAMembers.size() > 0)
assert(PendingHAMembers[0].getLocVT() == LocVT);
@@ -188,7 +189,7 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
if (ArgFlags.isInConsecutiveRegsLast()) {
- assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 8 &&
+ assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 4 &&
"Homogeneous aggregates must have between 1 and 4 members");
// Try to allocate a contiguous block of registers, each of the correct
@@ -196,7 +197,6 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
const uint16_t *RegList;
unsigned NumRegs;
switch (LocVT.SimpleTy) {
- case MVT::i32:
case MVT::f32:
RegList = SRegList;
NumRegs = 16;
@@ -235,20 +235,11 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
State.AllocateReg(SRegList[regNo]);
unsigned Size = LocVT.getSizeInBits() / 8;
- unsigned Align = Size;
-
- if (LocVT.SimpleTy == MVT::v2f64 || LocVT.SimpleTy == MVT::i32) {
- // Vectors are always aligned to 8 bytes. If we've seen an i32 here
- // it's because it's been split from a larger type, also with align 8.
- Align = 8;
- }
+ unsigned Align = std::min(Size, 8U);
for (auto It : PendingHAMembers) {
It.convertToMem(State.AllocateStack(Size, Align));
State.addLoc(It);
-
- // Only the first member needs to be aligned.
- Align = 1;
}
// All pending members have now been allocated
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 24992c7d6f2..faf12fefffd 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -445,8 +445,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
!Subtarget->isThumb1Only()) {
addRegisterClass(MVT::f32, &ARM::SPRRegClass);
- if (!Subtarget->isFPOnlySP())
- addRegisterClass(MVT::f64, &ARM::DPRRegClass);
+ addRegisterClass(MVT::f64, &ARM::DPRRegClass);
}
for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
@@ -628,6 +627,39 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (!Subtarget->isThumb1Only())
setTargetDAGCombine(ISD::ADDC);
+ if (Subtarget->isFPOnlySP()) {
+ // When targetting a floating-point unit with only single-precision
+ // operations, f64 is legal for the few double-precision instructions which
+ // are present However, no double-precision operations other than moves,
+ // loads and stores are provided by the hardware.
+ setOperationAction(ISD::FADD, MVT::f64, Expand);
+ setOperationAction(ISD::FSUB, MVT::f64, Expand);
+ setOperationAction(ISD::FMUL, MVT::f64, Expand);
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FDIV, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FGETSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FNEG, MVT::f64, Expand);
+ setOperationAction(ISD::FABS, MVT::f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FPOWI, MVT::f64, Expand);
+ setOperationAction(ISD::FPOW, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f64, Expand);
+ setOperationAction(ISD::FEXP, MVT::f64, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f64, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f64, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
+ setOperationAction(ISD::FRINT, MVT::f64, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
+ }
computeRegisterProperties();
@@ -3276,6 +3308,7 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue
ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
SDLoc dl) const {
+ assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
SDValue Cmp;
if (!isFloatingPointZero(RHS))
Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
@@ -3391,9 +3424,8 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
EVT VT = Op.getValueType();
- return DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, SelectTrue, SelectFalse,
- ARMcc, CCR, OverflowCmp);
-
+ return getCMOV(SDLoc(Op), VT, SelectTrue, SelectFalse, ARMcc, CCR,
+ OverflowCmp, DAG);
}
// Convert:
@@ -3427,7 +3459,7 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue CCR = Cond.getOperand(3);
SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
assert(True.getValueType() == VT);
- return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
+ return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
}
}
}
@@ -3497,6 +3529,32 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
}
}
+SDValue ARMTargetLowering::getCMOV(SDLoc dl, EVT VT, SDValue FalseVal,
+ SDValue TrueVal, SDValue ARMcc, SDValue CCR,
+ SDValue Cmp, SelectionDAG &DAG) const {
+ if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
+ FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
+ TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
+
+ SDValue TrueLow = TrueVal.getValue(0);
+ SDValue TrueHigh = TrueVal.getValue(1);
+ SDValue FalseLow = FalseVal.getValue(0);
+ SDValue FalseHigh = FalseVal.getValue(1);
+
+ SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
+ ARMcc, CCR, Cmp);
+ SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
+ ARMcc, CCR, duplicateCmp(Cmp, DAG));
+
+ return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
+ } else {
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
+ Cmp);
+ }
+}
+
SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
@@ -3506,6 +3564,18 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue FalseVal = Op.getOperand(3);
SDLoc dl(Op);
+ if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
+ DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
+ dl);
+
+ // If softenSetCCOperands only returned one value, we should compare it to
+ // zero.
+ if (!RHS.getNode()) {
+ RHS = DAG.getConstant(0, LHS.getValueType());
+ CC = ISD::SETNE;
+ }
+ }
+
if (LHS.getValueType() == MVT::i32) {
// Try to generate VSEL on ARMv8.
// The VSEL instruction can't use all the usual ARM condition
@@ -3530,8 +3600,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMcc;
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
- return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
- Cmp);
+ return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
}
ARMCC::CondCodes CondCode, CondCode2;
@@ -3570,14 +3639,12 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
- ARMcc, CCR, Cmp);
+ SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
if (CondCode2 != ARMCC::AL) {
SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
// FIXME: Needs another CMP because flag can have but one use.
SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
- Result = DAG.getNode(ARMISD::CMOV, dl, VT,
- Result, TrueVal, ARMcc2, CCR, Cmp2);
+ Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
}
return Result;
}
@@ -3710,6 +3777,18 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Dest = Op.getOperand(4);
SDLoc dl(Op);
+ if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
+ DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
+ dl);
+
+ // If softenSetCCOperands only returned one value, we should compare it to
+ // zero.
+ if (!RHS.getNode()) {
+ RHS = DAG.getConstant(0, LHS.getValueType());
+ CC = ISD::SETNE;
+ }
+ }
+
if (LHS.getValueType() == MVT::i32) {
SDValue ARMcc;
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
@@ -3802,11 +3881,23 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
}
-static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isVector())
return LowerVectorFP_TO_INT(Op, DAG);
+ if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
+ RTLIB::Libcall LC;
+ if (Op.getOpcode() == ISD::FP_TO_SINT)
+ LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
+ Op.getValueType());
+ else
+ LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),
+ Op.getValueType());
+ return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,
+ /*isSigned*/ false, SDLoc(Op)).first;
+ }
+
SDLoc dl(Op);
unsigned Opc;
@@ -3856,11 +3947,23 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(Opc, dl, VT, Op);
}
-static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isVector())
return LowerVectorINT_TO_FP(Op, DAG);
+ if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
+ RTLIB::Libcall LC;
+ if (Op.getOpcode() == ISD::SINT_TO_FP)
+ LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
+ Op.getValueType());
+ else
+ LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
+ Op.getValueType());
+ return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,
+ /*isSigned*/ false, SDLoc(Op)).first;
+ }
+
SDLoc dl(Op);
unsigned Opc;
@@ -4369,7 +4472,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
SDLoc dl(Op);
- if (Op.getOperand(1).getValueType().isFloatingPoint()) {
+ if (Op1.getValueType().isFloatingPoint()) {
switch (SetCCOpcode) {
default: llvm_unreachable("Illegal FP comparison");
case ISD::SETUNE:
@@ -4633,6 +4736,11 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
bool IsDouble = Op.getValueType() == MVT::f64;
ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
+ // Use the default (constant pool) lowering for double constants when we have
+ // an SP-only FPU
+ if (IsDouble && Subtarget->isFPOnlySP())
+ return SDValue();
+
// Try splatting with a VMOV.f32...
APFloat FPVal = CFP->getValueAPF();
int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
@@ -6336,6 +6444,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
return LowerDYNAMIC_STACKALLOC(Op, DAG);
llvm_unreachable("Don't know how to custom lower this!");
+ case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
+ case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
}
}
@@ -8479,10 +8589,11 @@ static SDValue PerformBFICombine(SDNode *N,
/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
/// ARMISD::VMOVRRD.
static SDValue PerformVMOVRRDCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
// vmovrrd(vmovdrr x, y) -> x,y
SDValue InDouble = N->getOperand(0);
- if (InDouble.getOpcode() == ARMISD::VMOVDRR)
+ if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP())
return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
// vmovrrd(load f64) -> (load i32), (load i32)
@@ -8695,7 +8806,8 @@ static bool hasNormalLoadOperand(SDNode *N) {
/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
/// ISD::BUILD_VECTOR.
static SDValue PerformBUILD_VECTORCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI){
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
// build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
// VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
// into a pair of GPRs, which is fine when the value is used as a scalar,
@@ -9710,10 +9822,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
case ARMISD::BFI: return PerformBFICombine(N, DCI);
- case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
+ case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
case ISD::STORE: return PerformSTORECombine(N, DCI);
- case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI);
+ case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
@@ -10703,6 +10815,31 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
return DAG.getMergeValues(Ops, DL);
}
+SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() &&
+ "Unexpected type for custom-lowering FP_EXTEND");
+
+ RTLIB::Libcall LC;
+ LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
+
+ SDValue SrcVal = Op.getOperand(0);
+ return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
+ /*isSigned*/ false, SDLoc(Op)).first;
+}
+
+SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getOperand(0).getValueType() == MVT::f64 &&
+ Subtarget->isFPOnlySP() &&
+ "Unexpected type for custom-lowering FP_ROUND");
+
+ RTLIB::Libcall LC;
+ LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
+
+ SDValue SrcVal = Op.getOperand(0);
+ return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
+ /*isSigned*/ false, SDLoc(Op)).first;
+}
+
bool
ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The ARM target isn't yet aware of offsets.
@@ -10730,7 +10867,7 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
return false;
if (VT == MVT::f32)
return ARM_AM::getFP32Imm(Imm) != -1;
- if (VT == MVT::f64)
+ if (VT == MVT::f64 && !Subtarget->isFPOnlySP())
return ARM_AM::getFP64Imm(Imm) != -1;
return false;
}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index eb6a7fb2896..3ad9f13d453 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -476,6 +476,10 @@ namespace llvm {
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
unsigned getRegisterByName(const char* RegName, EVT VT) const override;
@@ -565,6 +569,9 @@ namespace llvm {
bool mayBeEmittedAsTailCall(CallInst *CI) const override;
+ SDValue getCMOV(SDLoc dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
+ SDValue ARMcc, SDValue CCR, SDValue Cmp,
+ SelectionDAG &DAG) const;
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMcc, SelectionDAG &DAG, SDLoc dl) const;
SDValue getVFPCmp(SDValue LHS, SDValue RHS,
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index 81fa636b38b..90223213a2e 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -515,6 +515,8 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
let Inst{5} = Sm{0};
let Inst{15-12} = Dd{3-0};
let Inst{22} = Dd{4};
+
+ let Predicates = [HasVFP2, HasDPVFP];
}
// Special case encoding: bits 11-8 is 0b1011.
OpenPOWER on IntegriCloud