Introduce codegen for the Signal Processing Engine

Summary: The Signal Processing Engine (SPE) is found on NXP/Freescale e500v1, e500v2, and several e200 cores. This adds support targeting the e500v2, as this is more common than the e500v1, and is in SoCs still on the market. This patch is very intrusive because the SPE is binary incompatible with the traditional FPU. After discussing with others, the cleanest solution was to make both SPE and FPU features on top of a base PowerPC subset, so all FPU instructions are now wrapped with HasFPU predicates. Supported by this are: * Code generation following the SPE ABI at the LLVM IR level (calling conventions) * Single- and Double-precision math at the level supported by the APU. Still to do: * Vector operations * SPE intrinsics As this changes the Callee-saved register list order, one test, which tests the precise generated code, was updated to account for the new register order. Reviewed by: nemanjai Differential Revision: https://reviews.llvm.org/D44830 llvm-svn: 337347
author: Justin Hibbits <jrh29@alumni.cwru.edu> 2018-07-18 04:25:10 +0000
committer: Justin Hibbits <jrh29@alumni.cwru.edu> 2018-07-18 04:25:10 +0000
commit: d52990c71b6f83c9beeba4efc9103412e0416ba9 (patch)
tree: febc0bdc8253b60c767bc182970d894684843e06 /llvm/lib/Target/PowerPC/PPCFastISel.cpp
parent: 4fa4fa6a73a7829494df8272ef49825fb2066d1a (diff)
download: bcm5719-llvm-d52990c71b6f83c9beeba4efc9103412e0416ba9.tar.gz
bcm5719-llvm-d52990c71b6f83c9beeba4efc9103412e0416ba9.zip
1 files changed, 119 insertions, 29 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index cd19568306e..19d94137400 100644
--- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -153,7 +153,8 @@ class PPCFastISel final : public FastISel {
       return RC->getID() == PPC::VSSRCRegClassID;
     }
     bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
-                    bool isZExt, unsigned DestReg);
+                    bool isZExt, unsigned DestReg,
+                    const PPC::Predicate Pred);
     bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
                      const TargetRegisterClass *RC, bool IsZExt = true,
                      unsigned FP64LoadOpc = PPC::LFD);
@@ -466,6 +467,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
                               bool IsZExt, unsigned FP64LoadOpc) {
   unsigned Opc;
   bool UseOffset = true;
+  bool HasSPE = PPCSubTarget->hasSPE();
 
   // If ResultReg is given, it determines the register class of the load.
   // Otherwise, RC is the register class to use.  If the result of the
@@ -477,8 +479,8 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
   const TargetRegisterClass *UseRC =
     (ResultReg ? MRI.getRegClass(ResultReg) :
      (RC ? RC :
-      (VT == MVT::f64 ? &PPC::F8RCRegClass :
-       (VT == MVT::f32 ? &PPC::F4RCRegClass :
+      (VT == MVT::f64 ? (HasSPE ? &PPC::SPERCRegClass : &PPC::F8RCRegClass) :
+       (VT == MVT::f32 ? (HasSPE ? &PPC::SPE4RCRegClass : &PPC::F4RCRegClass) :
         (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
          &PPC::GPRC_and_GPRC_NOR0RegClass)))));
 
@@ -507,7 +509,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
       UseOffset = ((Addr.Offset & 3) == 0);
       break;
     case MVT::f32:
-      Opc = PPC::LFS;
+      Opc = PPCSubTarget->hasSPE() ? PPC::SPELWZ : PPC::LFS;
       break;
     case MVT::f64:
       Opc = FP64LoadOpc;
@@ -578,6 +580,8 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
       case PPC::LD:     Opc = PPC::LDX;     break;
       case PPC::LFS:    Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
       case PPC::LFD:    Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
+      case PPC::EVLDD:  Opc = PPC::EVLDDX;  break;
+      case PPC::SPELWZ: Opc = PPC::SPELWZX;    break;
     }
 
     auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
@@ -620,7 +624,8 @@ bool PPCFastISel::SelectLoad(const Instruction *I) {
     AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
 
   unsigned ResultReg = 0;
-  if (!PPCEmitLoad(VT, ResultReg, Addr, RC))
+  if (!PPCEmitLoad(VT, ResultReg, Addr, RC, true,
+      PPCSubTarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
     return false;
   updateValueMap(I, ResultReg);
   return true;
@@ -653,10 +658,10 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
       UseOffset = ((Addr.Offset & 3) == 0);
       break;
     case MVT::f32:
-      Opc = PPC::STFS;
+      Opc = PPCSubTarget->hasSPE() ? PPC::SPESTW : PPC::STFS;
       break;
     case MVT::f64:
-      Opc = PPC::STFD;
+      Opc = PPCSubTarget->hasSPE() ? PPC::EVSTDD : PPC::STFD;
       break;
   }
 
@@ -721,6 +726,8 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
       case PPC::STD:  Opc = PPC::STDX;  break;
       case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
       case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
+      case PPC::EVSTDD: Opc = PPC::EVSTDDX; break;
+      case PPC::SPESTW: Opc = PPC::SPESTWX; break;
     }
 
     auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
@@ -794,11 +801,12 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
       unsigned CondReg = createResultReg(&PPC::CRRCRegClass);
 
       if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
-                      CondReg))
+                      CondReg, PPCPred))
         return false;
 
       BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
-        .addImm(PPCPred).addReg(CondReg).addMBB(TBB);
+        .addImm(PPCSubTarget->hasSPE() ? PPC::PRED_SPE : PPCPred)
+        .addReg(CondReg).addMBB(TBB);
       finishCondBranch(BI->getParent(), TBB, FBB);
       return true;
     }
@@ -822,7 +830,8 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
 // Attempt to emit a compare of the two source values.  Signed and unsigned
 // comparisons are supported.  Return false if we can't handle it.
 bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
-                             bool IsZExt, unsigned DestReg) {
+                             bool IsZExt, unsigned DestReg,
+                             const PPC::Predicate Pred) {
   Type *Ty = SrcValue1->getType();
   EVT SrcEVT = TLI.getValueType(DL, Ty, true);
   if (!SrcEVT.isSimple())
@@ -838,6 +847,7 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
   // similar to ARM in this regard.
   long Imm = 0;
   bool UseImm = false;
+  const bool HasSPE = PPCSubTarget->hasSPE();
 
   // Only 16-bit integer constants can be represented in compares for
   // PowerPC.  Others will be materialized into a register.
@@ -856,10 +866,38 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
   switch (SrcVT.SimpleTy) {
     default: return false;
     case MVT::f32:
-      CmpOpc = PPC::FCMPUS;
+      if (HasSPE) {
+        switch (Pred) {
+          default: return false;
+          case PPC::PRED_EQ:
+            CmpOpc = PPC::EFSCMPEQ;
+            break;
+          case PPC::PRED_LT:
+            CmpOpc = PPC::EFSCMPLT;
+            break;
+          case PPC::PRED_GT:
+            CmpOpc = PPC::EFSCMPGT;
+            break;
+        }
+      } else
+        CmpOpc = PPC::FCMPUS;
       break;
     case MVT::f64:
-      CmpOpc = PPC::FCMPUD;
+      if (HasSPE) {
+        switch (Pred) {
+          default: return false;
+          case PPC::PRED_EQ:
+            CmpOpc = PPC::EFDCMPEQ;
+            break;
+          case PPC::PRED_LT:
+            CmpOpc = PPC::EFDCMPLT;
+            break;
+          case PPC::PRED_GT:
+            CmpOpc = PPC::EFDCMPGT;
+            break;
+        }
+      } else
+        CmpOpc = PPC::FCMPUD;
       break;
     case MVT::i1:
     case MVT::i8:
@@ -947,9 +985,19 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
     return false;
 
   // Round the result to single precision.
-  unsigned DestReg = createResultReg(&PPC::F4RCRegClass);
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg)
-    .addReg(SrcReg);
+  unsigned DestReg;
+
+  if (PPCSubTarget->hasSPE()) {
+    DestReg = createResultReg(&PPC::SPE4RCRegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+      TII.get(PPC::EFSCFD), DestReg)
+      .addReg(SrcReg);
+  } else {
+    DestReg = createResultReg(&PPC::F4RCRegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+      TII.get(PPC::FRSP), DestReg)
+      .addReg(SrcReg);
+  }
 
   updateValueMap(I, DestReg);
   return true;
@@ -1031,6 +1079,22 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
   if (SrcReg == 0)
     return false;
 
+  // Shortcut for SPE.  Doesn't need to store/load, since it's all in the GPRs
+  if (PPCSubTarget->hasSPE()) {
+    unsigned Opc;
+    if (DstVT == MVT::f32)
+      Opc = IsSigned ? PPC::EFSCFSI : PPC::EFSCFUI;
+    else
+      Opc = IsSigned ? PPC::EFDCFSI : PPC::EFDCFUI;
+
+    unsigned DestReg = createResultReg(&PPC::SPERCRegClass);
+    // Generate the convert.
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
+      .addReg(SrcReg);
+    updateValueMap(I, DestReg);
+    return true;
+  }
+
   // We can only lower an unsigned convert if we have the newer
   // floating-point conversion operations.
   if (!IsSigned && !PPCSubTarget->hasFPCVT())
@@ -1125,8 +1189,9 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
   if (DstVT != MVT::i32 && DstVT != MVT::i64)
     return false;
 
-  // If we don't have FCTIDUZ and we need it, punt to SelectionDAG.
-  if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget->hasFPCVT())
+  // If we don't have FCTIDUZ, or SPE, and we need it, punt to SelectionDAG.
+  if (DstVT == MVT::i64 && !IsSigned &&
+      !PPCSubTarget->hasFPCVT() && !PPCSubTarget->hasSPE())
     return false;
 
   Value *Src = I->getOperand(0);
@@ -1154,23 +1219,36 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
 
   // Determine the opcode for the conversion, which takes place
   // entirely within FPRs.
-  unsigned DestReg = createResultReg(&PPC::F8RCRegClass);
+  unsigned DestReg;
   unsigned Opc;
 
-  if (DstVT == MVT::i32)
-    if (IsSigned)
-      Opc = PPC::FCTIWZ;
+  if (PPCSubTarget->hasSPE()) {
+    if (DstVT == MVT::i32) {
+      DestReg = createResultReg(&PPC::GPRCRegClass);
+      if (IsSigned)
+        Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
+      else
+        Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
+    }
+  } else {
+    DestReg = createResultReg(&PPC::F8RCRegClass);
+    if (DstVT == MVT::i32)
+      if (IsSigned)
+        Opc = PPC::FCTIWZ;
+      else
+        Opc = PPCSubTarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
     else
-      Opc = PPCSubTarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
-  else
-    Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
+      Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
+  }
 
   // Generate the convert.
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
     .addReg(SrcReg);
 
   // Now move the integer value from a float register to an integer register.
-  unsigned IntReg = PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
+  unsigned IntReg = PPCSubTarget->hasSPE() ? DestReg :
+    PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
+
   if (IntReg == 0)
     return false;
 
@@ -1918,8 +1996,13 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
   assert(Align > 0 && "Unexpectedly missing alignment information!");
   unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
-  const TargetRegisterClass *RC =
-    (VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass;
+  const bool HasSPE = PPCSubTarget->hasSPE();
+  const TargetRegisterClass *RC;
+  if (HasSPE)
+    RC = ((VT == MVT::f32) ? &PPC::SPE4RCRegClass : &PPC::SPERCRegClass);
+  else
+    RC = ((VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass);
+
   unsigned DestReg = createResultReg(RC);
   CodeModel::Model CModel = TM.getCodeModel();
 
@@ -1927,7 +2010,13 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
       MachinePointerInfo::getConstantPool(*FuncInfo.MF),
       MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Align);
 
-  unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD;
+  unsigned Opc;
+
+  if (HasSPE)
+    Opc = ((VT == MVT::f32) ? PPC::SPELWZ : PPC::EVLDD);
+  else
+    Opc = ((VT == MVT::f32) ? PPC::LFS : PPC::LFD);
+
   unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
 
   PPCFuncInfo->setUsesTOCBasePtr();
@@ -2263,7 +2352,8 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
 
   unsigned ResultReg = MI->getOperand(0).getReg();
 
-  if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt))
+  if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt,
+        PPCSubTarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
     return false;
 
   MI->eraseFromParent();
author	Justin Hibbits <jrh29@alumni.cwru.edu>	2018-07-18 04:25:10 +0000
committer	Justin Hibbits <jrh29@alumni.cwru.edu>	2018-07-18 04:25:10 +0000
commit	d52990c71b6f83c9beeba4efc9103412e0416ba9 (patch)
tree	febc0bdc8253b60c767bc182970d894684843e06 /llvm/lib/Target/PowerPC/PPCFastISel.cpp
parent	4fa4fa6a73a7829494df8272ef49825fb2066d1a (diff)
download	bcm5719-llvm-d52990c71b6f83c9beeba4efc9103412e0416ba9.tar.gz bcm5719-llvm-d52990c71b6f83c9beeba4efc9103412e0416ba9.zip