[PPC] Heuristic to choose between a X-Form VSX ld/st vs a X-Form FP ld/st.

The VSX versions have the advantage of a full 64-register target whereas the FP ones have the advantage of lower latency and higher throughput. So what we’re after is using the faster instructions in low register pressure situations and using the larger register file in high register pressure situations. The heuristic chooses between the following 7 pairs of instructions. PPC::LXSSPX vs PPC::LFSX PPC::LXSDX vs PPC::LFDX PPC::STXSSPX vs PPC::STFSX PPC::STXSDX vs PPC::STFDX PPC::LXSIWAX vs PPC::LFIWAX PPC::LXSIWZX vs PPC::LFIWZX PPC::STXSIWX vs PPC::STFIWX Differential Revision: https://reviews.llvm.org/D38486 llvm-svn: 318651
author: Tony Jiang <jtony@ca.ibm.com> 2017-11-20 14:38:30 +0000
committer: Tony Jiang <jtony@ca.ibm.com> 2017-11-20 14:38:30 +0000
commit: 438bf4a66b1573887823fe36c82c45a891107768 (patch)
tree: 05e9a21a96890432bd744b8cd764ec78d79e56c8 /llvm/lib
parent: 0c5a29b6be61151e0e6876014c3c4dfd8aa1b9d9 (diff)
download: bcm5719-llvm-438bf4a66b1573887823fe36c82c45a891107768.tar.gz
bcm5719-llvm-438bf4a66b1573887823fe36c82c45a891107768.zip
5 files changed, 158 insertions, 46 deletions
diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td
index 510352d5a9b..dc6ed16e53c 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -711,7 +711,8 @@ def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C],
     LXV,
     LXVX,
     LXSD,
-    DFLOADf64
+    DFLOADf64,
+    XFLOADf64
 )>;
 
 // 4 Cycle load uses a single slice.
@@ -751,7 +752,10 @@ def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
     LXSSPX,
     LXSIWAX,
     LXSSP,
-    DFLOADf32
+    DFLOADf32,
+    XFLOADf32,
+    LIWAX,
+    LIWZX
 )>;
 
 // Cracked Load that requires the PM resource.
@@ -781,7 +785,10 @@ def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
     STXSSPX,
     STXSIWX,
     DFSTOREf32,
-    DFSTOREf64
+    DFSTOREf64,
+    XFSTOREf32,
+    XFSTOREf64,
+    STIWX
 )>;
 
 // Store operation that requires the whole superslice.
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 70920294aea..99a52902a52 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1977,29 +1977,13 @@ PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
   return makeArrayRef(TargetFlags);
 }
 
-bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
-  auto &MBB = *MI.getParent();
-  auto DL = MI.getDebugLoc();
-  switch (MI.getOpcode()) {
-  case TargetOpcode::LOAD_STACK_GUARD: {
-    assert(Subtarget.isTargetLinux() &&
-           "Only Linux target is expected to contain LOAD_STACK_GUARD");
-    const int64_t Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008;
-    const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
-    MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ));
-    MachineInstrBuilder(*MI.getParent()->getParent(), MI)
-        .addImm(Offset)
-        .addReg(Reg);
-    return true;
-  }
-  case PPC::DFLOADf32:
-  case PPC::DFLOADf64:
-  case PPC::DFSTOREf32:
-  case PPC::DFSTOREf64: {
-    assert(Subtarget.hasP9Vector() &&
-           "Invalid D-Form Pseudo-ops on non-P9 target.");
-    assert(MI.getOperand(2).isReg() && MI.getOperand(1).isImm() &&
-           "D-form op must have register and immediate operands");
+// Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction.
+// The VSX versions have the advantage of a full 64-register target whereas
+// the FP ones have the advantage of lower latency and higher throughput. So
+// what we are after is using the faster instructions in low register pressure
+// situations and using the larger register file in high register pressure
+// situations.
+bool PPCInstrInfo::expandVSXMemPseudo(MachineInstr &MI) const {
     unsigned UpperOpcode, LowerOpcode;
     switch (MI.getOpcode()) {
     case PPC::DFLOADf32:
@@ -2018,7 +2002,38 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
       UpperOpcode = PPC::STXSD;
       LowerOpcode = PPC::STFD;
       break;
+    case PPC::XFLOADf32:
+      UpperOpcode = PPC::LXSSPX;
+      LowerOpcode = PPC::LFSX;
+      break;
+    case PPC::XFLOADf64:
+      UpperOpcode = PPC::LXSDX;
+      LowerOpcode = PPC::LFDX;
+      break;
+    case PPC::XFSTOREf32:
+      UpperOpcode = PPC::STXSSPX;
+      LowerOpcode = PPC::STFSX;
+      break;
+    case PPC::XFSTOREf64:
+      UpperOpcode = PPC::STXSDX;
+      LowerOpcode = PPC::STFDX;
+      break;
+    case PPC::LIWAX:
+      UpperOpcode = PPC::LXSIWAX;
+      LowerOpcode = PPC::LFIWAX;
+      break;
+    case PPC::LIWZX:
+      UpperOpcode = PPC::LXSIWZX;
+      LowerOpcode = PPC::LFIWZX;
+      break;
+    case PPC::STIWX:
+      UpperOpcode = PPC::STXSIWX;
+      LowerOpcode = PPC::STFIWX;
+      break;
+    default:
+      llvm_unreachable("Unknown Operation!");
     }
+
     unsigned TargetReg = MI.getOperand(0).getReg();
     unsigned Opcode;
     if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) ||
@@ -2028,6 +2043,52 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
       Opcode = UpperOpcode;
     MI.setDesc(get(Opcode));
     return true;
+}
+
+bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
+  auto &MBB = *MI.getParent();
+  auto DL = MI.getDebugLoc();
+
+  switch (MI.getOpcode()) {
+  case TargetOpcode::LOAD_STACK_GUARD: {
+    assert(Subtarget.isTargetLinux() &&
+           "Only Linux target is expected to contain LOAD_STACK_GUARD");
+    const int64_t Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008;
+    const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
+    MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ));
+    MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+        .addImm(Offset)
+        .addReg(Reg);
+    return true;
+  }
+  case PPC::DFLOADf32:
+  case PPC::DFLOADf64:
+  case PPC::DFSTOREf32:
+  case PPC::DFSTOREf64: {
+    assert(Subtarget.hasP9Vector() &&
+           "Invalid D-Form Pseudo-ops on Pre-P9 target.");
+    assert(MI.getOperand(2).isReg() && MI.getOperand(1).isImm() &&
+           "D-form op must have register and immediate operands");
+    return expandVSXMemPseudo(MI);
+  }
+  case PPC::XFLOADf32:
+  case PPC::XFSTOREf32:
+  case PPC::LIWAX:
+  case PPC::LIWZX:
+  case PPC::STIWX: {
+    assert(Subtarget.hasP8Vector() &&
+           "Invalid X-Form Pseudo-ops on Pre-P8 target.");
+    assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
+           "X-form op must have register and register operands");
+    return expandVSXMemPseudo(MI);
+  }
+  case PPC::XFLOADf64:
+  case PPC::XFSTOREf64: {
+    assert(Subtarget.hasVSX() &&
+           "Invalid X-Form Pseudo-ops on target that has no VSX.");
+    assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
+           "X-form op must have register and register operands");
+    return expandVSXMemPseudo(MI);
   }
   case PPC::SPILLTOVSR_LD: {
     unsigned TargetReg = MI.getOperand(0).getReg();
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 565392f76e4..f7a44b89233 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -282,6 +282,9 @@ public:
   ArrayRef<std::pair<unsigned, const char *>>
   getSerializableBitmaskMachineOperandTargetFlags() const override;
 
+  // Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction.
+  bool expandVSXMemPseudo(MachineInstr &MI) const;
+
   // Lower pseudo instructions after register allocation.
   bool expandPostRAPseudo(MachineInstr &MI) const override;
 
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 3261bc9bc53..9ddb12ea8c1 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -131,6 +131,12 @@ let Uses = [RM] in {
                         "lxsdx $XT, $src", IIC_LdStLFD,
                         [(set f64:$XT, (load xoaddr:$src))]>;
 
+    // Pseudo instruction XFLOADf64 will be expanded to LXSDX or LFDX later
+    let isPseudo = 1, CodeSize = 3 in
+      def XFLOADf64  : Pseudo<(outs vsfrc:$XT), (ins memrr:$src),
+                              "#XFLOADf64",
+                              [(set f64:$XT, (load xoaddr:$src))]>;
+
     let Predicates = [HasVSX, HasOnlySwappingMemOps] in
     def LXVD2X : XX1Form<31, 844,
                          (outs vsrc:$XT), (ins memrr:$src),
@@ -156,6 +162,12 @@ let Uses = [RM] in {
                         "stxsdx $XT, $dst", IIC_LdStSTFD,
                         [(store f64:$XT, xoaddr:$dst)]>;
 
+    // Pseudo instruction XFSTOREf64  will be expanded to STXSDX or STFDX later
+    let isPseudo = 1, CodeSize = 3 in
+      def XFSTOREf64 : Pseudo<(outs), (ins vsfrc:$XT, memrr:$dst),
+                              "#XFSTOREf64",
+                              [(store f64:$XT, xoaddr:$dst)]>;
+
     let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
     // The behaviour of this instruction is endianness-specific so we provide no
     // pattern to match it without considering endianness.
@@ -1215,32 +1227,59 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
   let mayLoad = 1, mayStore = 0 in {
     let CodeSize = 3 in
     def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src),
-                         "lxsspx $XT, $src", IIC_LdStLFD,
-                         [(set f32:$XT, (load xoaddr:$src))]>;
+                         "lxsspx $XT, $src", IIC_LdStLFD, []>;
     def LXSIWAX : XX1Form<31, 76, (outs vsfrc:$XT), (ins memrr:$src),
-                          "lxsiwax $XT, $src", IIC_LdStLFD,
-                          [(set f64:$XT, (PPClfiwax xoaddr:$src))]>;
+                          "lxsiwax $XT, $src", IIC_LdStLFD, []>;
     def LXSIWZX : XX1Form<31, 12, (outs vsfrc:$XT), (ins memrr:$src),
-                          "lxsiwzx $XT, $src", IIC_LdStLFD,
-                          [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>;
+                          "lxsiwzx $XT, $src", IIC_LdStLFD, []>;
+
+    // Please note let isPseudo = 1 is not part of class Pseudo<>. Missing it
+    // would cause these Pseudos are not expanded in expandPostRAPseudos()
+    let isPseudo = 1 in {
+      // Pseudo instruction XFLOADf32 will be expanded to LXSSPX or LFSX later
+      let CodeSize = 3 in
+      def XFLOADf32  : Pseudo<(outs vssrc:$XT), (ins memrr:$src),
+                              "#XFLOADf32",
+                              [(set f32:$XT, (load xoaddr:$src))]>;
+      // Pseudo instruction LIWAX will be expanded to LXSIWAX or LFIWAX later
+      def LIWAX : Pseudo<(outs vsfrc:$XT), (ins memrr:$src),
+                         "#LIWAX",
+                         [(set f64:$XT, (PPClfiwax xoaddr:$src))]>;
+      // Pseudo instruction LIWZX will be expanded to LXSIWZX or LFIWZX later
+      def LIWZX : Pseudo<(outs vsfrc:$XT), (ins memrr:$src),
+                         "#LIWZX",
+                         [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>;
+    }
   } // mayLoad
 
   // VSX scalar stores introduced in ISA 2.07
   let mayStore = 1, mayLoad = 0 in {
     let CodeSize = 3 in
     def STXSSPX : XX1Form<31, 652, (outs), (ins vssrc:$XT, memrr:$dst),
-                          "stxsspx $XT, $dst", IIC_LdStSTFD,
-                          [(store f32:$XT, xoaddr:$dst)]>;
+                          "stxsspx $XT, $dst", IIC_LdStSTFD, []>;
     def STXSIWX : XX1Form<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst),
-                          "stxsiwx $XT, $dst", IIC_LdStSTFD,
-                          [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
+                          "stxsiwx $XT, $dst", IIC_LdStSTFD, []>;
+
+    // Please note let isPseudo = 1 is not part of class Pseudo<>. Missing it
+    // would cause these Pseudos are not expanded in expandPostRAPseudos()
+    let isPseudo = 1 in {
+      // Pseudo instruction XFSTOREf32 will be expanded to STXSSPX or STFSX later
+      let CodeSize = 3 in
+      def XFSTOREf32 : Pseudo<(outs), (ins vssrc:$XT, memrr:$dst),
+                              "#XFSTOREf32",
+                              [(store f32:$XT, xoaddr:$dst)]>;
+      // Pseudo instruction STIWX will be expanded to STXSIWX or STFIWX later
+      def STIWX : Pseudo<(outs), (ins vsfrc:$XT, memrr:$dst),
+                         "#STIWX",
+                        [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
+    }
   } // mayStore
   } // UseVSXReg = 1
 
   def : Pat<(f64 (extloadf32 xoaddr:$src)),
-            (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>;
+            (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$src), VSFRC)>;
   def : Pat<(f32 (fpround (extloadf32 xoaddr:$src))),
-            (f32 (LXSSPX xoaddr:$src))>;
+            (f32 (XFLOADf32 xoaddr:$src))>;
   def : Pat<(f64 (fpextend f32:$src)),
             (COPY_TO_REGCLASS $src, VSFRC)>;
 
@@ -1414,7 +1453,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
             (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
   }
   def : Pat<(v4i32 (scalar_to_vector ScalarLoads.Li32)),
-            (v4i32 (XXSPLTWs (LXSIWAX xoaddr:$src), 1))>;
+            (v4i32 (XXSPLTWs (LIWAX xoaddr:$src), 1))>;
 } // AddedComplexity = 400
 } // HasP8Vector
 
@@ -3047,10 +3086,10 @@ let AddedComplexity = 400 in {
                                (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), 0))>;
     def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)),
               (v4i32 (XXSPLTW (COPY_TO_REGCLASS
-                                (XSCVDPSXWSs (LXSSPX xoaddr:$A)), VSRC), 1))>;
+                                (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>;
     def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),
               (v4i32 (XXSPLTW (COPY_TO_REGCLASS
-                                (XSCVDPUXWSs (LXSSPX xoaddr:$A)), VSRC), 1))>;
+                                (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>;
     def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)),
               (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>;
 
@@ -3068,19 +3107,19 @@ let AddedComplexity = 400 in {
   }
 
   let Predicates = [HasVSX, NoP9Vector] in {
-    // Load-and-splat with fp-to-int conversion (using X-Form VSX loads).
+    // Load-and-splat with fp-to-int conversion (using X-Form VSX/FP loads).
     def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)),
               (v4i32 (XXSPLTW (COPY_TO_REGCLASS
-                                (XSCVDPSXWS (LXSDX xoaddr:$A)), VSRC), 1))>;
+                                (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>;
     def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)),
               (v4i32 (XXSPLTW (COPY_TO_REGCLASS
-                                (XSCVDPUXWS (LXSDX xoaddr:$A)), VSRC), 1))>;
+                                (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>;
     def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)),
               (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
-                                              (LXSSPX xoaddr:$A), VSFRC)), 0))>;
+                                              (XFLOADf32 xoaddr:$A), VSFRC)), 0))>;
     def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)),
               (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
-                                              (LXSSPX xoaddr:$A), VSFRC)), 0))>;
+                                              (XFLOADf32 xoaddr:$A), VSFRC)), 0))>;
   }
 
   // Big endian, available on all targets with VSX
diff --git a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index 7d34efd4af3..c51368d6d2a 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -353,6 +353,8 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
         break;
       case PPC::LXSDX:
       case PPC::LXSSPX:
+      case PPC::XFLOADf64:
+      case PPC::XFLOADf32:
         // A load of a floating-point value into the high-order half of
         // a vector register is safe, provided that we introduce a swap
         // following the load, which will be done by the SUBREG_TO_REG
author	Tony Jiang <jtony@ca.ibm.com>	2017-11-20 14:38:30 +0000
committer	Tony Jiang <jtony@ca.ibm.com>	2017-11-20 14:38:30 +0000
commit	438bf4a66b1573887823fe36c82c45a891107768 (patch)
tree	05e9a21a96890432bd744b8cd764ec78d79e56c8 /llvm/lib
parent	0c5a29b6be61151e0e6876014c3c4dfd8aa1b9d9 (diff)
download	bcm5719-llvm-438bf4a66b1573887823fe36c82c45a891107768.tar.gz bcm5719-llvm-438bf4a66b1573887823fe36c82c45a891107768.zip