[ARM] Stop using scalar FP instructions in integer-only MVE mode.

If you compile with `-mattr=+mve` (enabling integer MVE instructions but not floating-point ones), then the scalar FP //registers// exist and it's legal to move things in and out of them, load and store them, but it's not legal to do arithmetic on them. In D60708, the calls to `addRegisterClass` in ARMISelLowering that enable use of the scalar FP registers became conditionalised on `Subtarget->hasFPRegs()` instead of `Subtarget->hasVFP2Base()`, so that loads, stores and moves of those registers would work. But I didn't realise that that would also enable all the operations on those types by default. Now, if the target doesn't have basic VFP, we follow up those `addRegisterClass` calls by turning back off all the nontrivial operations you can perform on f32 and f64. That causes several knock-on failures, which are fixed by allowing the `VMOVDcc` and `VMOVScc` instructions to be selected even if all you have is `HasFPRegs`, and adjusting several checks for 'is this a double in a single-precision-only world?' to the more general 'is this any FP type we can't do arithmetic on?'. Between those, the whole of the `float-ops.ll` and `fp16-instructions.ll` tests can now run in MVE-without-FP mode and generate correct-looking code. One odd side effect is that I had to relax the check lines in that test so that they permit test functions like `add_f` to be generated as tailcalls to software FP library functions, instead of ordinary calls. Doing that is entirely legal, but the mystery is why this is the first RUN line that's needed the relaxation: on the usual kind of non-FP target, no tailcalls ever seem to be generated. Going by the llc messages, I think `SoftenFloatResult` must be perturbing the code generation in some way, but that's as much as I can guess. Reviewers: dmgreen, ostannard Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D63938 llvm-svn: 364909
author: Simon Tatham <simon.tatham@arm.com> 2019-07-02 11:26:00 +0000
committer: Simon Tatham <simon.tatham@arm.com> 2019-07-02 11:26:00 +0000
commit: 7b63a9533c7eba6e1402eebe6e03a54036df48cf (patch)
tree: 4ea8390a90c158037ada7f3b5d48cf7ce597c38f /llvm/lib
parent: c0b0f35788b54b9cf02087dcc9dcc7de4aedba7c (diff)
download: bcm5719-llvm-7b63a9533c7eba6e1402eebe6e03a54036df48cf.tar.gz
bcm5719-llvm-7b63a9533c7eba6e1402eebe6e03a54036df48cf.zip
3 files changed, 35 insertions, 18 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 632ee004c9f..36c783c2e65 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -224,6 +224,13 @@ void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
 void ARMTargetLowering::setAllExpand(MVT VT) {
   for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
     setOperationAction(Opc, VT, Expand);
+
+  // We support these really simple operations even on types where all
+  // the actual arithmetic has to be broken down into simpler
+  // operations or turned into library calls.
+  setOperationAction(ISD::BITCAST, VT, Legal);
+  setOperationAction(ISD::LOAD, VT, Legal);
+  setOperationAction(ISD::STORE, VT, Legal);
 }
 
 void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
@@ -262,9 +269,6 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
-    setOperationAction(ISD::BITCAST, VT, Legal);
-    setOperationAction(ISD::LOAD, VT, Legal);
-    setOperationAction(ISD::STORE, VT, Legal);
 
     if (HasMVEFP) {
       // No native support for these.
@@ -289,9 +293,6 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
   for (auto VT : LongTypes) {
     addRegisterClass(VT, &ARM::QPRRegClass);
     setAllExpand(VT);
-    setOperationAction(ISD::BITCAST, VT, Legal);
-    setOperationAction(ISD::LOAD, VT, Legal);
-    setOperationAction(ISD::STORE, VT, Legal);
   }
 
   // It is legal to extload from v4i8 to v4i16 or v4i32.
@@ -594,10 +595,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   else
     addRegisterClass(MVT::i32, &ARM::GPRRegClass);
 
-  if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
-      !Subtarget->isThumb1Only()) {
+  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
+      Subtarget->hasFPRegs()) {
     addRegisterClass(MVT::f32, &ARM::SPRRegClass);
     addRegisterClass(MVT::f64, &ARM::DPRRegClass);
+    if (!Subtarget->hasVFP2Base())
+      setAllExpand(MVT::f32);
+    if (!Subtarget->hasFP64())
+      setAllExpand(MVT::f64);
   }
 
   if (Subtarget->hasFullFP16()) {
@@ -4544,6 +4549,16 @@ static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V,
   return false;
 }
 
+bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const {
+  if (VT == MVT::f32)
+    return !Subtarget->hasVFP2Base();
+  if (VT == MVT::f64)
+    return !Subtarget->hasFP64();
+  if (VT == MVT::f16)
+    return !Subtarget->hasFullFP16();
+  return false;
+}
+
 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   SDLoc dl(Op);
@@ -4587,9 +4602,9 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue TrueVal = Op.getOperand(2);
   SDValue FalseVal = Op.getOperand(3);
 
-  if (!Subtarget->hasFP64() && LHS.getValueType() == MVT::f64) {
-    DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
-                                                    dl);
+  if (isUnsupportedFloatingType(LHS.getValueType())) {
+    DAG.getTargetLoweringInfo().softenSetCCOperands(
+        DAG, LHS.getValueType(), LHS, RHS, CC, dl);
 
     // If softenSetCCOperands only returned one value, we should compare it to
     // zero.
@@ -4828,9 +4843,9 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue Dest = Op.getOperand(4);
   SDLoc dl(Op);
 
-  if (!Subtarget->hasFP64() && LHS.getValueType() == MVT::f64) {
-    DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
-                                                    dl);
+  if (isUnsupportedFloatingType(LHS.getValueType())) {
+    DAG.getTargetLoweringInfo().softenSetCCOperands(
+        DAG, LHS.getValueType(), LHS, RHS, CC, dl);
 
     // If softenSetCCOperands only returned one value, we should compare it to
     // zero.
@@ -4975,7 +4990,7 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   if (VT.isVector())
     return LowerVectorFP_TO_INT(Op, DAG);
-  if (!Subtarget->hasFP64() && Op.getOperand(0).getValueType() == MVT::f64) {
+  if (isUnsupportedFloatingType(Op.getOperand(0).getValueType())) {
     RTLIB::Libcall LC;
     if (Op.getOpcode() == ISD::FP_TO_SINT)
       LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
@@ -5039,7 +5054,7 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   if (VT.isVector())
     return LowerVectorINT_TO_FP(Op, DAG);
-  if (!Subtarget->hasFP64() && Op.getValueType() == MVT::f64) {
+  if (isUnsupportedFloatingType(VT)) {
     RTLIB::Libcall LC;
     if (Op.getOpcode() == ISD::SINT_TO_FP)
       LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index e79144d91b7..ca8b042c56b 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -794,6 +794,8 @@ class VectorType;
 
     bool shouldConsiderGEPOffsetSplit() const override { return true; }
 
+    bool isUnsupportedFloatingType(EVT VT) const;
+
     SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
                     SDValue ARMcc, SDValue CCR, SDValue Cmp,
                     SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index f6406283944..ea31e631d3a 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -2269,13 +2269,13 @@ def VMOVDcc  : PseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, cmovpred:$p),
                     IIC_fpUNA64,
                     [(set (f64 DPR:$Dd),
                           (ARMcmov DPR:$Dn, DPR:$Dm, cmovpred:$p))]>,
-               RegConstraint<"$Dn = $Dd">, Requires<[HasVFP2,HasDPVFP]>;
+               RegConstraint<"$Dn = $Dd">, Requires<[HasFPRegs64]>;
 
 def VMOVScc  : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p),
                     IIC_fpUNA32,
                     [(set (f32 SPR:$Sd),
                           (ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>,
-               RegConstraint<"$Sn = $Sd">, Requires<[HasVFP2]>;
+               RegConstraint<"$Sn = $Sd">, Requires<[HasFPRegs]>;
 } // hasSideEffects
 
 //===----------------------------------------------------------------------===//
author	Simon Tatham <simon.tatham@arm.com>	2019-07-02 11:26:00 +0000
committer	Simon Tatham <simon.tatham@arm.com>	2019-07-02 11:26:00 +0000
commit	7b63a9533c7eba6e1402eebe6e03a54036df48cf (patch)
tree	4ea8390a90c158037ada7f3b5d48cf7ce597c38f /llvm/lib
parent	c0b0f35788b54b9cf02087dcc9dcc7de4aedba7c (diff)
download	bcm5719-llvm-7b63a9533c7eba6e1402eebe6e03a54036df48cf.tar.gz bcm5719-llvm-7b63a9533c7eba6e1402eebe6e03a54036df48cf.zip