diff options
Diffstat (limited to 'llvm/lib/Target/ARM')
-rw-r--r-- | llvm/lib/Target/ARM/ARM.td | 86 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMAsmPrinter.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMFastISel.cpp | 40 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 70 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrInfo.td | 24 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstructionSelector.cpp | 11 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMLegalizerInfo.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMSubtarget.h | 35 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 31 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp | 46 |
16 files changed, 203 insertions, 175 deletions
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td index 48eba2246c5..20a61d343b3 100644 --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -32,12 +32,40 @@ def ModeSoftFloat : SubtargetFeature<"soft-float","UseSoftFloat", // // Floating Point, HW Division and Neon Support -def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true", - "Enable VFP2 instructions">; +def FeatureFP64 : SubtargetFeature<"fp64", "HasFP64", "true", + "Floating point unit supports " + "double precision">; + +def FeatureD32 : SubtargetFeature<"d32", "HasD32", "true", + "Extend FP to 32 double registers">; + +multiclass VFPver<string name, string query, string description, + list<SubtargetFeature> prev = [], + list<SubtargetFeature> otherimplies = []> { + def _D16_SP: SubtargetFeature< + name#"d16sp", query#"D16SP", "true", + description#" with only 16 d-registers and no double precision", + !foreach(v, prev, !cast<SubtargetFeature>(v # "_D16_SP")) # otherimplies>; + def _SP: SubtargetFeature< + name#"sp", query#"SP", "true", + description#" with no double precision", + !foreach(v, prev, !cast<SubtargetFeature>(v # "_SP")) # + otherimplies # [FeatureD32, !cast<SubtargetFeature>(NAME # "_D16_SP")]>; + def _D16: SubtargetFeature< + name#"d16", query#"D16", "true", + description#" with only 16 d-registers", + !foreach(v, prev, !cast<SubtargetFeature>(v # "_D16")) # + otherimplies # [FeatureFP64, !cast<SubtargetFeature>(NAME # "_D16_SP")]>; + def "": SubtargetFeature< + name, query, "true", description, + prev # otherimplies # [ + !cast<SubtargetFeature>(NAME # "_D16"), + !cast<SubtargetFeature>(NAME # "_SP")]>; +} -def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true", - "Enable VFP3 instructions", - [FeatureVFP2]>; +defm FeatureVFP2: VFPver<"vfp2", "HasVFPv2", "Enable VFP2 instructions">; +defm FeatureVFP3: VFPver<"vfp3", "HasVFPv3", "Enable VFP3 instructions", + [FeatureVFP2]>; def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", "Enable NEON instructions", @@ -47,31 +75,22 @@ def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", "Enable half-precision " "floating point">; -def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", - "Enable VFP4 instructions", - [FeatureVFP3, FeatureFP16]>; +defm FeatureVFP4: VFPver<"vfp4", "HasVFPv4", "Enable VFP4 instructions", + [FeatureVFP3], [FeatureFP16]>; -def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", - "true", "Enable ARMv8 FP", - [FeatureVFP4]>; +defm FeatureFPARMv8: VFPver<"fp-armv8", "HasFPARMv8", "Enable ARMv8 FP", + [FeatureVFP4]>; def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", "Enable full half-precision " "floating point", - [FeatureFPARMv8]>; + [FeatureFPARMv8_D16_SP]>; def FeatureFP16FML : SubtargetFeature<"fp16fml", "HasFP16FML", "true", "Enable full half-precision " "floating point fml instructions", [FeatureFullFP16]>; -def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", - "Floating point unit supports " - "single precision only">; - -def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", - "Restrict FP to 16 double registers">; - def FeatureHWDivThumb : SubtargetFeature<"hwdiv", "HasHardwareDivideInThumb", "true", "Enable divide instructions in Thumb">; @@ -943,14 +962,12 @@ def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4, FeatureHasRetAddrStack, FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, - FeatureVFP3, - FeatureD16, + FeatureVFP3_D16, FeatureAvoidPartialCPSR]>; def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5, FeatureHasRetAddrStack, - FeatureVFP3, - FeatureD16, + FeatureVFP3_D16, FeatureSlowFPBrcc, FeatureHWDivARM, FeatureHasSlowFPVMLx, @@ -958,8 +975,7 @@ def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5, def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7, FeatureHasRetAddrStack, - FeatureVFP3, - FeatureD16, + FeatureVFP3_D16, FeatureFP16, FeatureMP, FeatureSlowFPBrcc, @@ -969,8 +985,7 @@ def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7, def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r, FeatureHasRetAddrStack, - FeatureVFP3, - FeatureD16, + FeatureVFP3_D16, FeatureFP16, FeatureMP, FeatureSlowFPBrcc, @@ -991,10 +1006,8 @@ def : ProcessorModel<"sc300", CortexM4Model, [ARMv7m, FeatureUseAA, FeatureHasNoBranchPredictor]>; -def : ProcessorModel<"cortex-m4", CortexM4Model, [ARMv7em, - FeatureVFP4, - FeatureVFPOnlySP, - FeatureD16, +def : ProcessorModel<"cortex-m4", CortexM4Model, [ARMv7em, + FeatureVFP4_D16_SP, FeaturePrefLoopAlign32, FeatureHasSlowFPVMLx, FeatureUseMISched, @@ -1002,17 +1015,14 @@ def : ProcessorModel<"cortex-m4", CortexM4Model, [ARMv7em, FeatureHasNoBranchPredictor]>; def : ProcNoItin<"cortex-m7", [ARMv7em, - FeatureFPARMv8, - FeatureD16]>; + FeatureFPARMv8_D16]>; def : ProcNoItin<"cortex-m23", [ARMv8mBaseline, FeatureNoMovt]>; def : ProcessorModel<"cortex-m33", CortexM4Model, [ARMv8mMainline, FeatureDSP, - FeatureFPARMv8, - FeatureD16, - FeatureVFPOnlySP, + FeatureFPARMv8_D16_SP, FeaturePrefLoopAlign32, FeatureHasSlowFPVMLx, FeatureUseMISched, @@ -1021,9 +1031,7 @@ def : ProcessorModel<"cortex-m33", CortexM4Model, [ARMv8mMainline, def : ProcessorModel<"cortex-m35p", CortexM4Model, [ARMv8mMainline, FeatureDSP, - FeatureFPARMv8, - FeatureD16, - FeatureVFPOnlySP, + FeatureFPARMv8_D16_SP, FeaturePrefLoopAlign32, FeatureHasSlowFPVMLx, FeatureUseMISched, diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 239b95ffb8e..6bede80adaa 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -657,7 +657,7 @@ void ARMAsmPrinter::emitAttributes() { ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, ARMBuildAttrs::IEEEDenormals); else { - if (!STI.hasVFP2()) { + if (!STI.hasVFP2Base()) { // When the target doesn't have an FPU (by design or // intention), the assumptions made on the software support // mirror that of the equivalent hardware support *if it @@ -667,7 +667,7 @@ void ARMAsmPrinter::emitAttributes() { if (STI.hasV7Ops()) ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, ARMBuildAttrs::PreserveFPSign); - } else if (STI.hasVFP3()) { + } else if (STI.hasVFP3Base()) { // In VFPv4, VFPv4U, VFPv3, or VFPv3U, it is preserved. That is, // the sign bit of the zero matches the sign bit of the input or // result that is being flushed to zero. diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 22c53d9e26c..fbef5d790a4 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -133,7 +133,7 @@ ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, ScheduleHazardRecognizer *ARMBaseInstrInfo:: CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const { - if (Subtarget.isThumb2() || Subtarget.hasVFP2()) + if (Subtarget.isThumb2() || Subtarget.hasVFP2Base()) return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG); return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); } @@ -830,7 +830,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = ARM::VMOVRS; else if (SPRDest && GPRSrc) Opc = ARM::VMOVSR; - else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP()) + else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64()) Opc = ARM::VMOVD; else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) Opc = ARM::VORRq; @@ -890,7 +890,8 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, BeginIdx = ARM::dsub_0; SubRegs = 4; Spacing = 2; - } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) { + } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && + !Subtarget.hasFP64()) { Opc = ARM::VMOVS; BeginIdx = ARM::ssub_0; SubRegs = 2; @@ -1481,7 +1482,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { // copyPhysReg() calls. Look for VMOVS instructions that can legally be // widened to VMOVD. We prefer the VMOVD when possible because it may be // changed into a VORR that can go down the NEON pipeline. - if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || Subtarget.isFPOnlySP()) + if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64()) return false; // Look for a copy between even S-registers. That is where we keep floats diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 1870e4c0b7f..96200a09109 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -149,7 +149,7 @@ ARMBaseRegisterInfo::getTLSCallPreservedMask(const MachineFunction &MF) const { const uint32_t * ARMBaseRegisterInfo::getSjLjDispatchPreservedMask(const MachineFunction &MF) const { const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); - if (!STI.useSoftFloat() && STI.hasVFP2() && !STI.isThumb1Only()) + if (!STI.useSoftFloat() && STI.hasVFP2Base() && !STI.isThumb1Only()) return CSR_NoRegs_RegMask; else return CSR_FPRegs_RegMask; @@ -193,7 +193,7 @@ getReservedRegs(const MachineFunction &MF) const { if (STI.isR9Reserved()) markSuperRegs(Reserved, ARM::R9); // Reserve D16-D31 if the subtarget doesn't support them. - if (!STI.hasVFP3() || STI.hasD16()) { + if (!STI.hasD32()) { static_assert(ARM::D31 == ARM::D16 + 15, "Register list not consecutive!"); for (unsigned R = 0; R < 16; ++R) markSuperRegs(Reserved, ARM::D16 + R); diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp index cd01b70b378..6e274d269bf 100644 --- a/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -441,7 +441,7 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) { } // Require VFP2 for loading fp constants. - if (!Subtarget->hasVFP2()) return false; + if (!Subtarget->hasVFP2Base()) return false; // MachineConstantPool wants an explicit alignment. unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); @@ -969,7 +969,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; break; case MVT::f32: - if (!Subtarget->hasVFP2()) return false; + if (!Subtarget->hasVFP2Base()) return false; // Unaligned loads need special handling. Floats require word-alignment. if (Alignment && Alignment < 4) { needVMOV = true; @@ -982,7 +982,8 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, } break; case MVT::f64: - if (!Subtarget->hasVFP2()) return false; + // Can load and store double precision even without FeatureFP64 + if (!Subtarget->hasVFP2Base()) return false; // FIXME: Unaligned loads need special handling. Doublewords require // word-alignment. if (Alignment && Alignment < 4) @@ -1107,7 +1108,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, } break; case MVT::f32: - if (!Subtarget->hasVFP2()) return false; + if (!Subtarget->hasVFP2Base()) return false; // Unaligned stores need special handling. Floats require word-alignment. if (Alignment && Alignment < 4) { unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32)); @@ -1122,7 +1123,8 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, } break; case MVT::f64: - if (!Subtarget->hasVFP2()) return false; + // Can load and store double precision even without FeatureFP64 + if (!Subtarget->hasVFP2Base()) return false; // FIXME: Unaligned stores need special handling. Doublewords require // word-alignment. if (Alignment && Alignment < 4) @@ -1353,10 +1355,10 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, if (!SrcEVT.isSimple()) return false; MVT SrcVT = SrcEVT.getSimpleVT(); - if (Ty->isFloatTy() && !Subtarget->hasVFP2()) + if (Ty->isFloatTy() && !Subtarget->hasVFP2Base()) return false; - if (Ty->isDoubleTy() && (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP())) + if (Ty->isDoubleTy() && (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64())) return false; // Check to see if the 2nd operand is a constant that we can encode directly @@ -1506,7 +1508,7 @@ bool ARMFastISel::SelectCmp(const Instruction *I) { bool ARMFastISel::SelectFPExt(const Instruction *I) { // Make sure we have VFP and that we're extending float to double. - if (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()) return false; + if (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()) return false; Value *V = I->getOperand(0); if (!I->getType()->isDoubleTy() || @@ -1525,7 +1527,7 @@ bool ARMFastISel::SelectFPExt(const Instruction *I) { bool ARMFastISel::SelectFPTrunc(const Instruction *I) { // Make sure we have VFP and that we're truncating double to float. - if (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()) return false; + if (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()) return false; Value *V = I->getOperand(0); if (!(I->getType()->isFloatTy() && @@ -1544,7 +1546,7 @@ bool ARMFastISel::SelectFPTrunc(const Instruction *I) { bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) { // Make sure we have VFP. - if (!Subtarget->hasVFP2()) return false; + if (!Subtarget->hasVFP2Base()) return false; MVT DstVT; Type *Ty = I->getType(); @@ -1576,7 +1578,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) { unsigned Opc; if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS; - else if (Ty->isDoubleTy() && !Subtarget->isFPOnlySP()) + else if (Ty->isDoubleTy() && Subtarget->hasFP64()) Opc = isSigned ? ARM::VSITOD : ARM::VUITOD; else return false; @@ -1589,7 +1591,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) { bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) { // Make sure we have VFP. - if (!Subtarget->hasVFP2()) return false; + if (!Subtarget->hasVFP2Base()) return false; MVT DstVT; Type *RetTy = I->getType(); @@ -1602,7 +1604,7 @@ bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) { unsigned Opc; Type *OpTy = I->getOperand(0)->getType(); if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS; - else if (OpTy->isDoubleTy() && !Subtarget->isFPOnlySP()) + else if (OpTy->isDoubleTy() && Subtarget->hasFP64()) Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD; else return false; @@ -1808,9 +1810,9 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) { // if we have them. // FIXME: It'd be nice to use NEON instructions. Type *Ty = I->getType(); - if (Ty->isFloatTy() && !Subtarget->hasVFP2()) + if (Ty->isFloatTy() && !Subtarget->hasVFP2Base()) return false; - if (Ty->isDoubleTy() && (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP())) + if (Ty->isDoubleTy() && (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64())) return false; unsigned Opc; @@ -1852,7 +1854,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, default: report_fatal_error("Unsupported calling convention"); case CallingConv::Fast: - if (Subtarget->hasVFP2() && !isVarArg) { + if (Subtarget->hasVFP2Base() && !isVarArg) { if (!Subtarget->isAAPCS_ABI()) return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); // For AAPCS ABI targets, just use VFP variant of the calling convention. @@ -1863,7 +1865,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, case CallingConv::CXX_FAST_TLS: // Use target triple & subtarget features to do actual dispatch. if (Subtarget->isAAPCS_ABI()) { - if (Subtarget->hasVFP2() && + if (Subtarget->hasVFP2Base() && TM.Options.FloatABIType == FloatABI::Hard && !isVarArg) return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); else @@ -1932,11 +1934,11 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, case MVT::i32: break; case MVT::f32: - if (!Subtarget->hasVFP2()) + if (!Subtarget->hasVFP2Base()) return false; break; case MVT::f64: - if (!Subtarget->hasVFP2()) + if (!Subtarget->hasVFP2Base()) return false; break; } diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index cb66d16a194..492c83c2bf7 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -4043,9 +4043,9 @@ bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){ // If an opcode was found then we can lower the read to a VFP instruction. if (Opcode) { - if (!Subtarget->hasVFP2()) + if (!Subtarget->hasVFP2Base()) return false; - if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8()) + if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base()) return false; Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), @@ -4154,7 +4154,7 @@ bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){ .Default(0); if (Opcode) { - if (!Subtarget->hasVFP2()) + if (!Subtarget->hasVFP2Base()) return false; Ops = { N->getOperand(2), getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 88d318e7bb3..7dd2fef89ee 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -241,7 +241,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, if (Subtarget->isTargetMachO()) { // Uses VFP for Thumb libfuncs if available. - if (Subtarget->isThumb() && Subtarget->hasVFP2() && + if (Subtarget->isThumb() && Subtarget->hasVFP2Base() && Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) { static const struct { const RTLIB::Libcall Op; @@ -510,7 +510,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, else addRegisterClass(MVT::i32, &ARM::GPRRegClass); - if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() && + if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only()) { addRegisterClass(MVT::f32, &ARM::SPRRegClass); addRegisterClass(MVT::f64, &ARM::DPRRegClass); @@ -698,7 +698,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom); // NEON only has FMA instructions as of VFP4. - if (!Subtarget->hasVFP4()) { + if (!Subtarget->hasVFP4Base()) { setOperationAction(ISD::FMA, MVT::v2f32, Expand); setOperationAction(ISD::FMA, MVT::v4f32, Expand); } @@ -732,7 +732,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, } } - if (Subtarget->isFPOnlySP()) { + if (!Subtarget->hasFP64()) { // When targeting a floating-point unit with only single-precision // operations, f64 is legal for the few double-precision instructions which // are present However, no double-precision operations other than moves, @@ -1030,7 +1030,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, } setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() && + if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only()) { // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR // iff target supports vfp2. @@ -1080,7 +1080,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f32, Expand); - if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() && + if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only()) { setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); @@ -1088,7 +1088,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); - if (!Subtarget->hasVFP4()) { + if (!Subtarget->hasVFP4Base()) { setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::FMA, MVT::f32, Expand); } @@ -1096,7 +1096,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // Various VFP goodness if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) { // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded. - if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) { + if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) { setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); } @@ -1116,7 +1116,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, } // FP-ARMv8 implements a lot of rounding-like FP operations. - if (Subtarget->hasFPARMv8()) { + if (Subtarget->hasFPARMv8Base()) { setOperationAction(ISD::FFLOOR, MVT::f32, Legal); setOperationAction(ISD::FCEIL, MVT::f32, Legal); setOperationAction(ISD::FROUND, MVT::f32, Legal); @@ -1130,7 +1130,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); - if (!Subtarget->isFPOnlySP()) { + if (Subtarget->hasFP64()) { setOperationAction(ISD::FFLOOR, MVT::f64, Legal); setOperationAction(ISD::FCEIL, MVT::f64, Legal); setOperationAction(ISD::FROUND, MVT::f64, Legal); @@ -1202,7 +1202,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setStackPointerRegisterToSaveRestore(ARM::SP); if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() || - !Subtarget->hasVFP2() || Subtarget->hasMinSize()) + !Subtarget->hasVFP2Base() || Subtarget->hasMinSize()) setSchedulingPreference(Sched::RegPressure); else setSchedulingPreference(Sched::Hybrid); @@ -1637,7 +1637,7 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, case CallingConv::C: if (!Subtarget->isAAPCS_ABI()) return CallingConv::ARM_APCS; - else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && + else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && getTargetMachine().Options.FloatABIType == FloatABI::Hard && !isVarArg) return CallingConv::ARM_AAPCS_VFP; @@ -1646,10 +1646,11 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, case CallingConv::Fast: case CallingConv::CXX_FAST_TLS: if (!Subtarget->isAAPCS_ABI()) { - if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) + if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg) return CallingConv::Fast; return CallingConv::ARM_APCS; - } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) + } else if (Subtarget->hasVFP2Base() && + !Subtarget->isThumb1Only() && !isVarArg) return CallingConv::ARM_AAPCS_VFP; else return CallingConv::ARM_AAPCS; @@ -3912,7 +3913,7 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl, bool InvalidOnQNaN) const { - assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64); + assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64); SDValue Cmp; SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32); if (!isFloatingPointZero(RHS)) @@ -4225,7 +4226,7 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal, SDValue ARMcc, SDValue CCR, SDValue Cmp, SelectionDAG &DAG) const { - if (Subtarget->isFPOnlySP() && VT == MVT::f64) { + if (!Subtarget->hasFP64() && VT == MVT::f64) { FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), FalseVal); TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl, @@ -4474,7 +4475,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue TrueVal = Op.getOperand(2); SDValue FalseVal = Op.getOperand(3); - if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) { + if (!Subtarget->hasFP64() && LHS.getValueType() == MVT::f64) { DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, dl); @@ -4497,9 +4498,9 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { // inverting the compare condition, swapping 'less' and 'greater') and // sometimes need to swap the operands to the VSEL (which inverts the // condition in the sense of firing whenever the previous condition didn't) - if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f16 || - TrueVal.getValueType() == MVT::f32 || - TrueVal.getValueType() == MVT::f64)) { + if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 || + TrueVal.getValueType() == MVT::f32 || + TrueVal.getValueType() == MVT::f64)) { ARMCC::CondCodes CondCode = IntCCToARMCC(CC); if (CondCode == ARMCC::LT || CondCode == ARMCC::LE || CondCode == ARMCC::VC || CondCode == ARMCC::NE) { @@ -4522,7 +4523,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we // must use VSEL (limited condition codes), due to not having conditional f16 // moves. - if (Subtarget->hasFPARMv8() && + if (Subtarget->hasFPARMv8Base() && !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) && (TrueVal.getValueType() == MVT::f16 || TrueVal.getValueType() == MVT::f32 || @@ -4715,7 +4716,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Dest = Op.getOperand(4); SDLoc dl(Op); - if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) { + if (!Subtarget->hasFP64() && LHS.getValueType() == MVT::f64) { DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, dl); @@ -4862,7 +4863,7 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); if (VT.isVector()) return LowerVectorFP_TO_INT(Op, DAG); - if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) { + if (!Subtarget->hasFP64() && Op.getOperand(0).getValueType() == MVT::f64) { RTLIB::Libcall LC; if (Op.getOpcode() == ISD::FP_TO_SINT) LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), @@ -4926,7 +4927,7 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); if (VT.isVector()) return LowerVectorINT_TO_FP(Op, DAG); - if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) { + if (!Subtarget->hasFP64() && Op.getValueType() == MVT::f64) { RTLIB::Libcall LC; if (Op.getOpcode() == ISD::SINT_TO_FP) LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), @@ -5909,12 +5910,12 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, } } - if (!ST->hasVFP3()) + if (!ST->hasVFP3Base()) return SDValue(); // Use the default (constant pool) lowering for double constants when we have // an SP-only FPU - if (IsDouble && Subtarget->isFPOnlySP()) + if (IsDouble && !Subtarget->hasFP64()) return SDValue(); // Try splatting with a VMOV.f32... @@ -11356,7 +11357,7 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, const ARMSubtarget *Subtarget) { // vmovrrd(vmovdrr x, y) -> x,y SDValue InDouble = N->getOperand(0); - if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP()) + if (InDouble.getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64()) return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); // vmovrrd(load f64) -> (load i32), (load i32) @@ -13303,7 +13304,7 @@ static bool isLegalT2AddressImmediate(int64_t V, EVT VT, unsigned NumBytes = std::max(VT.getSizeInBits() / 8, 1U); // VLDR and LDRD: 4 * imm8 - if ((VT.isFloatingPoint() && Subtarget->hasVFP2()) || NumBytes == 8) + if ((VT.isFloatingPoint() && Subtarget->hasVFP2Base()) || NumBytes == 8) return isShiftedUInt<8, 2>(V); if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) { @@ -13347,7 +13348,7 @@ static bool isLegalAddressImmediate(int64_t V, EVT VT, return isUInt<8>(V); case MVT::f32: case MVT::f64: - if (!Subtarget->hasVFP2()) // FIXME: NEON? + if (!Subtarget->hasVFP2Base()) // FIXME: NEON? return false; return isShiftedUInt<8, 2>(V); } @@ -13910,7 +13911,7 @@ const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const { // Although we are correct (we are free to emit anything, without // constraints), we might break use cases that would expect us to be more // efficient and emit something else. - if (!Subtarget->hasVFP2()) + if (!Subtarget->hasVFP2Base()) return "r"; if (ConstraintVT.isFloatingPoint()) return "w"; @@ -14392,7 +14393,7 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const } SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { - assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() && + assert(Op.getValueType() == MVT::f64 && !Subtarget->hasFP64() && "Unexpected type for custom-lowering FP_EXTEND"); RTLIB::Libcall LC; @@ -14404,8 +14405,7 @@ SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { } SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { - assert(Op.getOperand(0).getValueType() == MVT::f64 && - Subtarget->isFPOnlySP() && + assert(Op.getOperand(0).getValueType() == MVT::f64 && !Subtarget->hasFP64() && "Unexpected type for custom-lowering FP_ROUND"); RTLIB::Libcall LC; @@ -14468,13 +14468,13 @@ bool ARM::isBitFieldInvertedMask(unsigned v) { /// materialize the FP immediate as a load from a constant pool. bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const { - if (!Subtarget->hasVFP3()) + if (!Subtarget->hasVFP3Base()) return false; if (VT == MVT::f16 && Subtarget->hasFullFP16()) return ARM_AM::getFP16Imm(Imm) != -1; if (VT == MVT::f32) return ARM_AM::getFP32Imm(Imm) != -1; - if (VT == MVT::f64 && !Subtarget->isFPOnlySP()) + if (VT == MVT::f64 && Subtarget->hasFP64()) return ARM_AM::getFP64Imm(Imm) != -1; return false; } diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index f55e73abbd7..d0821b94477 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -258,18 +258,18 @@ def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">, AssemblerPredicate<"HasV8_4aOps", "armv8.4a">; def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">, AssemblerPredicate<"HasV8_5aOps", "armv8.5a">; -def NoVFP : Predicate<"!Subtarget->hasVFP2()">; -def HasVFP2 : Predicate<"Subtarget->hasVFP2()">, - AssemblerPredicate<"FeatureVFP2", "VFP2">; -def HasVFP3 : Predicate<"Subtarget->hasVFP3()">, - AssemblerPredicate<"FeatureVFP3", "VFP3">; -def HasVFP4 : Predicate<"Subtarget->hasVFP4()">, - AssemblerPredicate<"FeatureVFP4", "VFP4">; -def HasDPVFP : Predicate<"!Subtarget->isFPOnlySP()">, - AssemblerPredicate<"!FeatureVFPOnlySP", +def NoVFP : Predicate<"!Subtarget->hasVFP2Base()">; +def HasVFP2 : Predicate<"Subtarget->hasVFP2Base()">, + AssemblerPredicate<"FeatureVFP2_D16_SP", "VFP2">; +def HasVFP3 : Predicate<"Subtarget->hasVFP3Base()">, + AssemblerPredicate<"FeatureVFP3_D16_SP", "VFP3">; +def HasVFP4 : Predicate<"Subtarget->hasVFP4Base()">, + AssemblerPredicate<"FeatureVFP4_D16_SP", "VFP4">; +def HasDPVFP : Predicate<"Subtarget->hasFP64()">, + AssemblerPredicate<"FeatureFP64", "double precision VFP">; -def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, - AssemblerPredicate<"FeatureFPARMv8", "FPARMv8">; +def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8Base()">, + AssemblerPredicate<"FeatureFPARMv8_D16_SP", "FPARMv8">; def HasNEON : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate<"FeatureNEON", "NEON">; def HasSHA2 : Predicate<"Subtarget->hasSHA2()">, @@ -371,7 +371,7 @@ def UseMulOps : Predicate<"Subtarget->useMulOps()">; // Do not use them for Darwin platforms. def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" " FPOpFusion::Fast && " - " Subtarget->hasVFP4()) && " + " Subtarget->hasVFP4Base()) && " "!Subtarget->isTargetDarwin() &&" "Subtarget->useFPVMLx()">; diff --git a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp index b97924cf975..4485a474a6d 100644 --- a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp @@ -232,7 +232,7 @@ static bool selectMergeValues(MachineInstrBuilder &MIB, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) { - assert(TII.getSubtarget().hasVFP2() && "Can't select merge without VFP"); + assert(TII.getSubtarget().hasVFP2Base() && "Can't select merge without VFP"); // We only support G_MERGE_VALUES as a way to stick together two scalar GPRs // into one DPR. @@ -263,7 +263,8 @@ static bool selectUnmergeValues(MachineInstrBuilder &MIB, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) { - assert(TII.getSubtarget().hasVFP2() && "Can't select unmerge without VFP"); + assert(TII.getSubtarget().hasVFP2Base() && + "Can't select unmerge without VFP"); // We only support G_UNMERGE_VALUES as a way to break up one DPR into two // GPRs. @@ -1036,12 +1037,12 @@ bool ARMInstructionSelector::select(MachineInstr &I, return selectCmp(Helper, MIB, MRI); } case G_FCMP: { - assert(STI.hasVFP2() && "Can't select fcmp without VFP"); + assert(STI.hasVFP2Base() && "Can't select fcmp without VFP"); unsigned OpReg = I.getOperand(2).getReg(); unsigned Size = MRI.getType(OpReg).getSizeInBits(); - if (Size == 64 && STI.isFPOnlySP()) { + if (Size == 64 && !STI.hasFP64()) { LLVM_DEBUG(dbgs() << "Subtarget only supports single precision"); return false; } @@ -1087,7 +1088,7 @@ bool ARMInstructionSelector::select(MachineInstr &I, LLT ValTy = MRI.getType(Reg); const auto ValSize = ValTy.getSizeInBits(); - assert((ValSize != 64 || STI.hasVFP2()) && + assert((ValSize != 64 || STI.hasVFP2Base()) && "Don't know how to load/store 64-bit value without VFP"); const auto NewOpc = selectLoadStoreOpCode(I.getOpcode(), RegBank, ValSize); diff --git a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp index 8f2029312d2..458cafdc7a5 100644 --- a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -157,7 +157,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { getActionDefinitionsBuilder(G_BRCOND).legalFor({s1}); - if (!ST.useSoftFloat() && ST.hasVFP2()) { + if (!ST.useSoftFloat() && ST.hasVFP2Base()) { getActionDefinitionsBuilder( {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FCONSTANT, G_FNEG}) .legalFor({s32, s64}); @@ -208,7 +208,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { .libcallForCartesianProduct({s32, s64}, {s32}); } - if (!ST.useSoftFloat() && ST.hasVFP4()) + if (!ST.useSoftFloat() && ST.hasVFP4Base()) getActionDefinitionsBuilder(G_FMA).legalFor({s32, s64}); else getActionDefinitionsBuilder(G_FMA).libcallFor({s32, s64}); diff --git a/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp index d03b482043e..4566ac2c9dd 100644 --- a/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -453,7 +453,7 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { for (const auto &Mapping : OperandsMapping[i]) { assert( (Mapping.RegBank->getID() != ARM::FPRRegBankID || - MF.getSubtarget<ARMSubtarget>().hasVFP2()) && + MF.getSubtarget<ARMSubtarget>().hasVFP2Base()) && "Trying to use floating point register bank on target without vfp"); } } diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 9500a9faf4e..abedc6f6d81 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -166,6 +166,21 @@ protected: bool HasFPARMv8 = false; bool HasNEON = false; + /// Versions of the VFP flags restricted to single precision, or to + /// 16 d-registers, or both. + bool HasVFPv2SP = false; + bool HasVFPv3SP = false; + bool HasVFPv4SP = false; + bool HasFPARMv8SP = false; + bool HasVFPv2D16 = false; + bool HasVFPv3D16 = false; + bool HasVFPv4D16 = false; + bool HasFPARMv8D16 = false; + bool HasVFPv2D16SP = false; + bool HasVFPv3D16SP = false; + bool HasVFPv4D16SP = false; + bool HasFPARMv8D16SP = false; + /// HasDotProd - True if the ARMv8.2A dot product instructions are supported. bool HasDotProd = false; @@ -232,9 +247,9 @@ protected: /// HasFP16FML - True if subtarget supports half-precision FP fml operations bool HasFP16FML = false; - /// HasD16 - True if subtarget is limited to 16 double precision + /// HasD32 - True if subtarget has the full 32 double precision /// FP registers for VFPv3. - bool HasD16 = false; + bool HasD32 = false; /// HasHardwareDivide - True if subtarget supports [su]div in Thumb mode bool HasHardwareDivideInThumb = false; @@ -291,9 +306,9 @@ protected: /// extension. bool HasVirtualization = false; - /// FPOnlySP - If true, the floating point unit only supports single + /// HasFP64 - If true, the floating point unit supports double /// precision. - bool FPOnlySP = false; + bool HasFP64 = false; /// If true, the processor supports the Performance Monitor Extensions. These /// include a generic cycle-counter as well as more fine-grained (often @@ -569,10 +584,10 @@ public: bool hasARMOps() const { return !NoARM; } - bool hasVFP2() const { return HasVFPv2; } - bool hasVFP3() const { return HasVFPv3; } - bool hasVFP4() const { return HasVFPv4; } - bool hasFPARMv8() const { return HasFPARMv8; } + bool hasVFP2Base() const { return HasVFPv2D16SP; } + bool hasVFP3Base() const { return HasVFPv3D16SP; } + bool hasVFP4Base() const { return HasVFPv4D16SP; } + bool hasFPARMv8Base() const { return HasFPARMv8D16SP; } bool hasNEON() const { return HasNEON; } bool hasSHA2() const { return HasSHA2; } bool hasAES() const { return HasAES; } @@ -601,7 +616,7 @@ public: bool useFPVMLx() const { return !SlowFPVMLx; } bool hasVMLxForwarding() const { return HasVMLxForwarding; } bool isFPBrccSlow() const { return SlowFPBrcc; } - bool isFPOnlySP() const { return FPOnlySP; } + bool hasFP64() const { return HasFP64; } bool hasPerfMon() const { return HasPerfMon; } bool hasTrustZone() const { return HasTrustZone; } bool has8MSecExt() const { return Has8MSecExt; } @@ -638,7 +653,7 @@ public: bool genExecuteOnly() const { return GenExecuteOnly; } bool hasFP16() const { return HasFP16; } - bool hasD16() const { return HasD16; } + bool hasD32() const { return HasD32; } bool hasFullFP16() const { return HasFullFP16; } bool hasFP16FML() const { return HasFP16FML; } diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 2fbcd8b2ba6..882a63c33a5 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -48,7 +48,7 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> { const ARMTargetLowering *TLI; // Currently the following features are excluded from InlineFeatureWhitelist. - // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureVFPOnlySP, FeatureD16 + // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32 // Depending on whether they are set or unset, different // instructions/registers are available. For example, inlining a callee with // -thumb-mode in a caller with +thumb-mode, may cause the assembler to diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index f4af747f3ee..f8a00f713e4 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -492,8 +492,8 @@ class ARMAsmParser : public MCTargetAsmParser { return getSTI().getFeatureBits()[ARM::FeatureDSP]; } - bool hasD16() const { - return getSTI().getFeatureBits()[ARM::FeatureD16]; + bool hasD32() const { + return getSTI().getFeatureBits()[ARM::FeatureD32]; } bool hasV8_1aOps() const { @@ -3424,7 +3424,7 @@ int ARMAsmParser::tryParseRegister() { } // Some FPUs only have 16 D registers, so D16-D31 are invalid - if (hasD16() && RegNum >= ARM::D16 && RegNum <= ARM::D31) + if (!hasD32() && RegNum >= ARM::D16 && RegNum <= ARM::D31) return -1; Parser.Lex(); // Eat identifier token. @@ -10415,11 +10415,11 @@ ARMAsmParser::getCustomOperandDiag(ARMMatchResultTy MatchError) { : "operand must be a register in range [r0, r12] or r14"; // DPR contains 16 registers for some FPUs, and 32 for others. case Match_DPR: - return hasD16() ? "operand must be a register in range [d0, d15]" - : "operand must be a register in range [d0, d31]"; + return hasD32() ? "operand must be a register in range [d0, d31]" + : "operand must be a register in range [d0, d15]"; case Match_DPR_RegList: - return hasD16() ? "operand must be a list of registers in range [d0, d15]" - : "operand must be a list of registers in range [d0, d31]"; + return hasD32() ? "operand must be a list of registers in range [d0, d31]" + : "operand must be a list of registers in range [d0, d15]"; // For all other diags, use the static string from tablegen. default: @@ -10621,14 +10621,15 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) { { ARM::AEK_CRC, {Feature_HasV8Bit}, {ARM::FeatureCRC} }, { ARM::AEK_CRYPTO, {Feature_HasV8Bit}, {ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} }, - { ARM::AEK_FP, {Feature_HasV8Bit}, {ARM::FeatureFPARMv8} }, + { ARM::AEK_FP, {Feature_HasV8Bit}, + {ARM::FeatureVFP2_D16_SP, ARM::FeatureFPARMv8} }, { (ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM), {Feature_HasV7Bit, Feature_IsNotMClassBit}, {ARM::FeatureHWDivThumb, ARM::FeatureHWDivARM} }, { ARM::AEK_MP, {Feature_HasV7Bit, Feature_IsNotMClassBit}, {ARM::FeatureMP} }, { ARM::AEK_SIMD, {Feature_HasV8Bit}, - {ARM::FeatureNEON, ARM::FeatureFPARMv8} }, + {ARM::FeatureNEON, ARM::FeatureVFP2_D16_SP, ARM::FeatureFPARMv8} }, { ARM::AEK_SEC, {Feature_HasV6KBit}, {ARM::FeatureTrustZone} }, // FIXME: Only available in A-class, isel not predicated { ARM::AEK_VIRT, {Feature_HasV7Bit}, {ARM::FeatureVirtualization} }, @@ -10678,12 +10679,12 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) { "allowed for the current base architecture"); MCSubtargetInfo &STI = copySTI(); - FeatureBitset ToggleFeatures = EnableFeature - ? (~STI.getFeatureBits() & Extension.Features) - : ( STI.getFeatureBits() & Extension.Features); - - FeatureBitset Features = - ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures)); + if (EnableFeature) { + STI.SetFeatureBitsTransitively(Extension.Features); + } else { + STI.ClearFeatureBitsTransitively(Extension.Features); + } + FeatureBitset Features = ComputeAvailableFeatures(STI.getFeatureBits()); setAvailableFeatures(Features); return false; } diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index d4b2be7d381..6948f7af469 100644 --- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -1043,9 +1043,9 @@ static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo, const FeatureBitset &featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits(); - bool hasD16 = featureBits[ARM::FeatureD16]; + bool hasD32 = featureBits[ARM::FeatureD32]; - if (RegNo > 31 || (hasD16 && RegNo > 15)) + if (RegNo > 31 || (!hasD32 && RegNo > 15)) return MCDisassembler::Fail; unsigned Register = DPRDecoderTable[RegNo]; diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp index 8f9c66507a4..9502a5d7c39 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -222,37 +222,37 @@ void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) { ? ARMBuildAttrs::AllowNeonARMv8_1a : ARMBuildAttrs::AllowNeonARMv8); } else { - if (STI.hasFeature(ARM::FeatureFPARMv8)) + if (STI.hasFeature(ARM::FeatureFPARMv8_D16_SP)) // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one // FPU, but there are two different names for it depending on the CPU. - emitFPU(STI.hasFeature(ARM::FeatureD16) - ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV5_SP_D16 - : ARM::FK_FPV5_D16) - : ARM::FK_FP_ARMV8); - else if (STI.hasFeature(ARM::FeatureVFP4)) - emitFPU(STI.hasFeature(ARM::FeatureD16) - ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV4_SP_D16 - : ARM::FK_VFPV4_D16) - : ARM::FK_VFPV4); - else if (STI.hasFeature(ARM::FeatureVFP3)) + emitFPU(STI.hasFeature(ARM::FeatureD32) + ? ARM::FK_FP_ARMV8 + : (STI.hasFeature(ARM::FeatureFP64) ? ARM::FK_FPV5_D16 + : ARM::FK_FPV5_SP_D16)); + else if (STI.hasFeature(ARM::FeatureVFP4_D16_SP)) + emitFPU(STI.hasFeature(ARM::FeatureD32) + ? ARM::FK_VFPV4 + : (STI.hasFeature(ARM::FeatureFP64) ? ARM::FK_VFPV4_D16 + : ARM::FK_FPV4_SP_D16)); + else if (STI.hasFeature(ARM::FeatureVFP3_D16_SP)) emitFPU( - STI.hasFeature(ARM::FeatureD16) - // +d16 - ? (STI.hasFeature(ARM::FeatureVFPOnlySP) - ? (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3XD_FP16 - : ARM::FK_VFPV3XD) - : (STI.hasFeature(ARM::FeatureFP16) + STI.hasFeature(ARM::FeatureD32) + // +d32 + ? (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3_FP16 + : ARM::FK_VFPV3) + // -d32 + : (STI.hasFeature(ARM::FeatureFP64) + ? (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3_D16_FP16 - : ARM::FK_VFPV3_D16)) - // -d16 - : (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3_FP16 - : ARM::FK_VFPV3)); - else if (STI.hasFeature(ARM::FeatureVFP2)) + : ARM::FK_VFPV3_D16) + : (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3XD_FP16 + : ARM::FK_VFPV3XD))); + else if (STI.hasFeature(ARM::FeatureVFP2_D16_SP)) emitFPU(ARM::FK_VFPV2); } // ABI_HardFP_use attribute to indicate single precision FP. - if (STI.hasFeature(ARM::FeatureVFPOnlySP)) + if (STI.hasFeature(ARM::FeatureVFP2_D16_SP) && !STI.hasFeature(ARM::FeatureFP64)) emitAttribute(ARMBuildAttrs::ABI_HardFP_use, ARMBuildAttrs::HardFPSinglePrecision); |