diff options
author | Justin Hibbits <jrh29@alumni.cwru.edu> | 2018-07-18 04:25:10 +0000 |
---|---|---|
committer | Justin Hibbits <jrh29@alumni.cwru.edu> | 2018-07-18 04:25:10 +0000 |
commit | d52990c71b6f83c9beeba4efc9103412e0416ba9 (patch) | |
tree | febc0bdc8253b60c767bc182970d894684843e06 /llvm/lib/Target/PowerPC/PPCISelLowering.cpp | |
parent | 4fa4fa6a73a7829494df8272ef49825fb2066d1a (diff) | |
download | bcm5719-llvm-d52990c71b6f83c9beeba4efc9103412e0416ba9.tar.gz bcm5719-llvm-d52990c71b6f83c9beeba4efc9103412e0416ba9.zip |
Introduce codegen for the Signal Processing Engine
Summary:
The Signal Processing Engine (SPE) is found on NXP/Freescale e500v1,
e500v2, and several e200 cores. This adds support targeting the e500v2,
as this is more common than the e500v1, and is in SoCs still on the
market.
This patch is very intrusive because the SPE is binary incompatible with
the traditional FPU. After discussing with others, the cleanest
solution was to make both SPE and FPU features on top of a base PowerPC
subset, so all FPU instructions are now wrapped with HasFPU predicates.
Supported by this are:
* Code generation following the SPE ABI at the LLVM IR level (calling
conventions)
* Single- and Double-precision math at the level supported by the APU.
Still to do:
* Vector operations
* SPE intrinsics
As this changes the Callee-saved register list order, one test, which
tests the precise generated code, was updated to account for the new
register order.
Reviewed by: nemanjai
Differential Revision: https://reviews.llvm.org/D44830
llvm-svn: 337347
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 117 |
1 files changed, 93 insertions, 24 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 9621a8ce91f..48355494afe 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -137,8 +137,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // Set up the register classes. addRegisterClass(MVT::i32, &PPC::GPRCRegClass); if (!useSoftFloat()) { - addRegisterClass(MVT::f32, &PPC::F4RCRegClass); - addRegisterClass(MVT::f64, &PPC::F8RCRegClass); + if (hasSPE()) { + addRegisterClass(MVT::f32, &PPC::SPE4RCRegClass); + addRegisterClass(MVT::f64, &PPC::SPERCRegClass); + } else { + addRegisterClass(MVT::f32, &PPC::F4RCRegClass); + addRegisterClass(MVT::f64, &PPC::F8RCRegClass); + } } // Match BITREVERSE to customized fast code sequence in the td file. @@ -162,15 +167,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); - setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal); - setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); - setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal); - setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal); + if (!Subtarget.hasSPE()) { + setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal); + setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal); + setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal); + setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal); + } // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry. const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; @@ -266,13 +273,18 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand); setOperationAction(ISD::FPOW , MVT::f64, Expand); - setOperationAction(ISD::FMA , MVT::f64, Legal); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); setOperationAction(ISD::FPOW , MVT::f32, Expand); - setOperationAction(ISD::FMA , MVT::f32, Legal); + if (Subtarget.hasSPE()) { + setOperationAction(ISD::FMA , MVT::f64, Expand); + setOperationAction(ISD::FMA , MVT::f32, Expand); + } else { + setOperationAction(ISD::FMA , MVT::f64, Legal); + setOperationAction(ISD::FMA , MVT::f32, Legal); + } setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); @@ -355,12 +367,19 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::BR_JT, MVT::Other, Expand); - // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + if (Subtarget.hasSPE()) { + // SPE has built-in conversions + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); + } else { + // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); - // PowerPC does not have [U|S]INT_TO_FP - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); + // PowerPC does not have [U|S]INT_TO_FP + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); + } if (Subtarget.hasDirectMove() && isPPC64) { setOperationAction(ISD::BITCAST, MVT::f32, Legal); @@ -458,6 +477,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); // Comparisons that require checking two conditions. + if (Subtarget.hasSPE()) { + setCondCodeAction(ISD::SETO, MVT::f32, Expand); + setCondCodeAction(ISD::SETO, MVT::f64, Expand); + setCondCodeAction(ISD::SETUO, MVT::f32, Expand); + setCondCodeAction(ISD::SETUO, MVT::f64, Expand); + } setCondCodeAction(ISD::SETULT, MVT::f32, Expand); setCondCodeAction(ISD::SETULT, MVT::f64, Expand); setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); @@ -485,7 +510,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); } else { // PowerPC does not have FP_TO_UINT on 32-bit implementations. - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); + if (Subtarget.hasSPE()) + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); + else + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); } // With the instructions enabled under FPCVT, we can do everything. @@ -1195,10 +1223,34 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty, return Align; } +unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, + EVT VT) const { + if (Subtarget.hasSPE() && VT == MVT::f64) + return 2; + return PPCTargetLowering::getNumRegisters(Context, VT); +} + +MVT PPCTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, + EVT VT) const { + if (Subtarget.hasSPE() && VT == MVT::f64) + return MVT::i32; + return PPCTargetLowering::getRegisterType(Context, VT); +} + +MVT PPCTargetLowering::getRegisterTypeForCallingConv(MVT VT) const { + if (Subtarget.hasSPE() && VT == MVT::f64) + return MVT::i32; + return PPCTargetLowering::getRegisterType(VT); +} + bool PPCTargetLowering::useSoftFloat() const { return Subtarget.useSoftFloat(); } +bool PPCTargetLowering::hasSPE() const { + return Subtarget.hasSPE(); +} + const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((PPCISD::NodeType)Opcode) { case PPCISD::FIRST_NUMBER: break; @@ -3362,7 +3414,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( // Reserve space for the linkage area on the stack. unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); CCInfo.AllocateStack(LinkageSize, PtrByteSize); - if (useSoftFloat()) + if (useSoftFloat() || hasSPE()) CCInfo.PreAnalyzeFormalArguments(Ins); CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); @@ -3386,12 +3438,16 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( case MVT::f32: if (Subtarget.hasP8Vector()) RC = &PPC::VSSRCRegClass; + else if (Subtarget.hasSPE()) + RC = &PPC::SPE4RCRegClass; else RC = &PPC::F4RCRegClass; break; case MVT::f64: if (Subtarget.hasVSX()) RC = &PPC::VSFRCRegClass; + else if (Subtarget.hasSPE()) + RC = &PPC::SPERCRegClass; else RC = &PPC::F8RCRegClass; break; @@ -3480,7 +3536,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( }; unsigned NumFPArgRegs = array_lengthof(FPArgRegs); - if (useSoftFloat()) + if (useSoftFloat() || hasSPE()) NumFPArgRegs = 0; FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs)); @@ -10230,6 +10286,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MI.getOpcode() == PPC::SELECT_CC_VSFRC || MI.getOpcode() == PPC::SELECT_CC_VSSRC || MI.getOpcode() == PPC::SELECT_CC_VSRC || + MI.getOpcode() == PPC::SELECT_CC_SPE4 || + MI.getOpcode() == PPC::SELECT_CC_SPE || MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 || MI.getOpcode() == PPC::SELECT_F4 || @@ -10238,6 +10296,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MI.getOpcode() == PPC::SELECT_QFRC || MI.getOpcode() == PPC::SELECT_QSRC || MI.getOpcode() == PPC::SELECT_QBRC || + MI.getOpcode() == PPC::SELECT_SPE || + MI.getOpcode() == PPC::SELECT_SPE4 || MI.getOpcode() == PPC::SELECT_VRRC || MI.getOpcode() == PPC::SELECT_VSFRC || MI.getOpcode() == PPC::SELECT_VSSRC || @@ -10271,6 +10331,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 || MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 || MI.getOpcode() == PPC::SELECT_F16 || + MI.getOpcode() == PPC::SELECT_SPE4 || + MI.getOpcode() == PPC::SELECT_SPE || MI.getOpcode() == PPC::SELECT_QFRC || MI.getOpcode() == PPC::SELECT_QSRC || MI.getOpcode() == PPC::SELECT_QBRC || @@ -13264,14 +13326,21 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, // really care overly much here so just give them all the same reg classes. case 'd': case 'f': - if (VT == MVT::f32 || VT == MVT::i32) - return std::make_pair(0U, &PPC::F4RCRegClass); - if (VT == MVT::f64 || VT == MVT::i64) - return std::make_pair(0U, &PPC::F8RCRegClass); - if (VT == MVT::v4f64 && Subtarget.hasQPX()) - return std::make_pair(0U, &PPC::QFRCRegClass); - if (VT == MVT::v4f32 && Subtarget.hasQPX()) - return std::make_pair(0U, &PPC::QSRCRegClass); + if (Subtarget.hasSPE()) { + if (VT == MVT::f32 || VT == MVT::i32) + return std::make_pair(0U, &PPC::SPE4RCRegClass); + if (VT == MVT::f64 || VT == MVT::i64) + return std::make_pair(0U, &PPC::SPERCRegClass); + } else { + if (VT == MVT::f32 || VT == MVT::i32) + return std::make_pair(0U, &PPC::F4RCRegClass); + if (VT == MVT::f64 || VT == MVT::i64) + return std::make_pair(0U, &PPC::F8RCRegClass); + if (VT == MVT::v4f64 && Subtarget.hasQPX()) + return std::make_pair(0U, &PPC::QFRCRegClass); + if (VT == MVT::v4f32 && Subtarget.hasQPX()) + return std::make_pair(0U, &PPC::QSRCRegClass); + } break; case 'v': if (VT == MVT::v4f64 && Subtarget.hasQPX()) |