[Power9] Add exploitation of non-permuting memory ops

This patch corresponds to review: https://reviews.llvm.org/D19825 The new lxvx/stxvx instructions do not require the swaps to line the elements up correctly. In order to select them over the lxvd2x/lxvw4x instructions which require swaps, the patterns for the old instruction have a predicate that ensures they won't be selected on Power9 and newer CPUs. llvm-svn: 282143
author: Nemanja Ivanovic <nemanja.i.ibm@gmail.com> 2016-09-22 09:52:19 +0000
committer: Nemanja Ivanovic <nemanja.i.ibm@gmail.com> 2016-09-22 09:52:19 +0000
commit: 6e7879c5e6e26c2c6d00618f12af20012eecff41 (patch)
tree: e75271eca7c8a4f68a884aad96748b64ab021bb5 /llvm/lib/Target/PowerPC/PPCISelLowering.cpp
parent: 2ce2ab3a4dc1df6ec87a42aa2baa6741c2456e0c (diff)
download: bcm5719-llvm-6e7879c5e6e26c2c6d00618f12af20012eecff41.tar.gz
bcm5719-llvm-6e7879c5e6e26c2c6d00618f12af20012eecff41.zip
1 files changed, 8 insertions, 4 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index cd75474a76a..1d9181b95d1 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -10734,10 +10734,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
     }
 
     // For little endian, VSX stores require generating xxswapd/lxvd2x.
+    // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
     EVT VT = N->getOperand(1).getValueType();
     if (VT.isSimple()) {
       MVT StoreVT = VT.getSimpleVT();
-      if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
+      if (Subtarget.needsSwapsForVSXMemOps() &&
           (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
            StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
         return expandVSXStoreForLE(N, DCI);
@@ -10749,9 +10750,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
     EVT VT = LD->getValueType(0);
 
     // For little endian, VSX loads require generating lxvd2x/xxswapd.
+    // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
     if (VT.isSimple()) {
       MVT LoadVT = VT.getSimpleVT();
-      if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
+      if (Subtarget.needsSwapsForVSXMemOps() &&
           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
         return expandVSXLoadForLE(N, DCI);
@@ -11066,7 +11068,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
     break;
   case ISD::INTRINSIC_W_CHAIN: {
     // For little endian, VSX loads require generating lxvd2x/xxswapd.
-    if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
+    // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
+    if (Subtarget.needsSwapsForVSXMemOps()) {
       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
       default:
         break;
@@ -11079,7 +11082,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
   }
   case ISD::INTRINSIC_VOID: {
     // For little endian, VSX stores require generating xxswapd/stxvd2x.
-    if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
+    // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
+    if (Subtarget.needsSwapsForVSXMemOps()) {
       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
       default:
         break;
author	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>	2016-09-22 09:52:19 +0000
committer	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>	2016-09-22 09:52:19 +0000
commit	6e7879c5e6e26c2c6d00618f12af20012eecff41 (patch)
tree	e75271eca7c8a4f68a884aad96748b64ab021bb5 /llvm/lib/Target/PowerPC/PPCISelLowering.cpp
parent	2ce2ab3a4dc1df6ec87a42aa2baa6741c2456e0c (diff)
download	bcm5719-llvm-6e7879c5e6e26c2c6d00618f12af20012eecff41.tar.gz bcm5719-llvm-6e7879c5e6e26c2c6d00618f12af20012eecff41.zip