summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
diff options
context:
space:
mode:
authorJustin Hibbits <jrh29@alumni.cwru.edu>2018-07-18 04:25:10 +0000
committerJustin Hibbits <jrh29@alumni.cwru.edu>2018-07-18 04:25:10 +0000
commitd52990c71b6f83c9beeba4efc9103412e0416ba9 (patch)
treefebc0bdc8253b60c767bc182970d894684843e06 /llvm/lib/Target/PowerPC/PPCISelLowering.cpp
parent4fa4fa6a73a7829494df8272ef49825fb2066d1a (diff)
downloadbcm5719-llvm-d52990c71b6f83c9beeba4efc9103412e0416ba9.tar.gz
bcm5719-llvm-d52990c71b6f83c9beeba4efc9103412e0416ba9.zip
Introduce codegen for the Signal Processing Engine
Summary: The Signal Processing Engine (SPE) is found on NXP/Freescale e500v1, e500v2, and several e200 cores. This adds support targeting the e500v2, as this is more common than the e500v1, and is in SoCs still on the market. This patch is very intrusive because the SPE is binary incompatible with the traditional FPU. After discussing with others, the cleanest solution was to make both SPE and FPU features on top of a base PowerPC subset, so all FPU instructions are now wrapped with HasFPU predicates. Supported by this are: * Code generation following the SPE ABI at the LLVM IR level (calling conventions) * Single- and Double-precision math at the level supported by the APU. Still to do: * Vector operations * SPE intrinsics As this changes the Callee-saved register list order, one test, which tests the precise generated code, was updated to account for the new register order. Reviewed by: nemanjai Differential Revision: https://reviews.llvm.org/D44830 llvm-svn: 337347
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp117
1 files changed, 93 insertions, 24 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 9621a8ce91f..48355494afe 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -137,8 +137,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// Set up the register classes.
addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
if (!useSoftFloat()) {
- addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
- addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
+ if (hasSPE()) {
+ addRegisterClass(MVT::f32, &PPC::SPE4RCRegClass);
+ addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
+ } else {
+ addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
+ addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
+ }
}
// Match BITREVERSE to customized fast code sequence in the td file.
@@ -162,15 +167,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
- setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
- setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
- setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
- setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
+ if (!Subtarget.hasSPE()) {
+ setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
+ }
// PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
@@ -266,13 +273,18 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FREM , MVT::f64, Expand);
setOperationAction(ISD::FPOW , MVT::f64, Expand);
- setOperationAction(ISD::FMA , MVT::f64, Legal);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FPOW , MVT::f32, Expand);
- setOperationAction(ISD::FMA , MVT::f32, Legal);
+ if (Subtarget.hasSPE()) {
+ setOperationAction(ISD::FMA , MVT::f64, Expand);
+ setOperationAction(ISD::FMA , MVT::f32, Expand);
+ } else {
+ setOperationAction(ISD::FMA , MVT::f64, Legal);
+ setOperationAction(ISD::FMA , MVT::f32, Legal);
+ }
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
@@ -355,12 +367,19 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ if (Subtarget.hasSPE()) {
+ // SPE has built-in conversions
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
+ } else {
+ // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
- // PowerPC does not have [U|S]INT_TO_FP
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+ // PowerPC does not have [U|S]INT_TO_FP
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+ }
if (Subtarget.hasDirectMove() && isPPC64) {
setOperationAction(ISD::BITCAST, MVT::f32, Legal);
@@ -458,6 +477,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
// Comparisons that require checking two conditions.
+ if (Subtarget.hasSPE()) {
+ setCondCodeAction(ISD::SETO, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETO, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
+ }
setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
@@ -485,7 +510,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
} else {
// PowerPC does not have FP_TO_UINT on 32-bit implementations.
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ if (Subtarget.hasSPE())
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
+ else
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
}
// With the instructions enabled under FPCVT, we can do everything.
@@ -1195,10 +1223,34 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
return Align;
}
+unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
+ EVT VT) const {
+ if (Subtarget.hasSPE() && VT == MVT::f64)
+ return 2;
+ return PPCTargetLowering::getNumRegisters(Context, VT);
+}
+
+MVT PPCTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+ EVT VT) const {
+ if (Subtarget.hasSPE() && VT == MVT::f64)
+ return MVT::i32;
+ return PPCTargetLowering::getRegisterType(Context, VT);
+}
+
+MVT PPCTargetLowering::getRegisterTypeForCallingConv(MVT VT) const {
+ if (Subtarget.hasSPE() && VT == MVT::f64)
+ return MVT::i32;
+ return PPCTargetLowering::getRegisterType(VT);
+}
+
bool PPCTargetLowering::useSoftFloat() const {
return Subtarget.useSoftFloat();
}
+bool PPCTargetLowering::hasSPE() const {
+ return Subtarget.hasSPE();
+}
+
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((PPCISD::NodeType)Opcode) {
case PPCISD::FIRST_NUMBER: break;
@@ -3362,7 +3414,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
// Reserve space for the linkage area on the stack.
unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
CCInfo.AllocateStack(LinkageSize, PtrByteSize);
- if (useSoftFloat())
+ if (useSoftFloat() || hasSPE())
CCInfo.PreAnalyzeFormalArguments(Ins);
CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
@@ -3386,12 +3438,16 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
case MVT::f32:
if (Subtarget.hasP8Vector())
RC = &PPC::VSSRCRegClass;
+ else if (Subtarget.hasSPE())
+ RC = &PPC::SPE4RCRegClass;
else
RC = &PPC::F4RCRegClass;
break;
case MVT::f64:
if (Subtarget.hasVSX())
RC = &PPC::VSFRCRegClass;
+ else if (Subtarget.hasSPE())
+ RC = &PPC::SPERCRegClass;
else
RC = &PPC::F8RCRegClass;
break;
@@ -3480,7 +3536,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
};
unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
- if (useSoftFloat())
+ if (useSoftFloat() || hasSPE())
NumFPArgRegs = 0;
FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
@@ -10230,6 +10286,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
MI.getOpcode() == PPC::SELECT_CC_VSRC ||
+ MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
+ MI.getOpcode() == PPC::SELECT_CC_SPE ||
MI.getOpcode() == PPC::SELECT_I4 ||
MI.getOpcode() == PPC::SELECT_I8 ||
MI.getOpcode() == PPC::SELECT_F4 ||
@@ -10238,6 +10296,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.getOpcode() == PPC::SELECT_QFRC ||
MI.getOpcode() == PPC::SELECT_QSRC ||
MI.getOpcode() == PPC::SELECT_QBRC ||
+ MI.getOpcode() == PPC::SELECT_SPE ||
+ MI.getOpcode() == PPC::SELECT_SPE4 ||
MI.getOpcode() == PPC::SELECT_VRRC ||
MI.getOpcode() == PPC::SELECT_VSFRC ||
MI.getOpcode() == PPC::SELECT_VSSRC ||
@@ -10271,6 +10331,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
MI.getOpcode() == PPC::SELECT_F16 ||
+ MI.getOpcode() == PPC::SELECT_SPE4 ||
+ MI.getOpcode() == PPC::SELECT_SPE ||
MI.getOpcode() == PPC::SELECT_QFRC ||
MI.getOpcode() == PPC::SELECT_QSRC ||
MI.getOpcode() == PPC::SELECT_QBRC ||
@@ -13264,14 +13326,21 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// really care overly much here so just give them all the same reg classes.
case 'd':
case 'f':
- if (VT == MVT::f32 || VT == MVT::i32)
- return std::make_pair(0U, &PPC::F4RCRegClass);
- if (VT == MVT::f64 || VT == MVT::i64)
- return std::make_pair(0U, &PPC::F8RCRegClass);
- if (VT == MVT::v4f64 && Subtarget.hasQPX())
- return std::make_pair(0U, &PPC::QFRCRegClass);
- if (VT == MVT::v4f32 && Subtarget.hasQPX())
- return std::make_pair(0U, &PPC::QSRCRegClass);
+ if (Subtarget.hasSPE()) {
+ if (VT == MVT::f32 || VT == MVT::i32)
+ return std::make_pair(0U, &PPC::SPE4RCRegClass);
+ if (VT == MVT::f64 || VT == MVT::i64)
+ return std::make_pair(0U, &PPC::SPERCRegClass);
+ } else {
+ if (VT == MVT::f32 || VT == MVT::i32)
+ return std::make_pair(0U, &PPC::F4RCRegClass);
+ if (VT == MVT::f64 || VT == MVT::i64)
+ return std::make_pair(0U, &PPC::F8RCRegClass);
+ if (VT == MVT::v4f64 && Subtarget.hasQPX())
+ return std::make_pair(0U, &PPC::QFRCRegClass);
+ if (VT == MVT::v4f32 && Subtarget.hasQPX())
+ return std::make_pair(0U, &PPC::QSRCRegClass);
+ }
break;
case 'v':
if (VT == MVT::v4f64 && Subtarget.hasQPX())
OpenPOWER on IntegriCloud