[PowerPC] Use vector types for memcpy and friends (sometimes)

When using Altivec, we can use vector loads and stores for aligned memcpy and friends. Starting with the P7 and VXS, we have reasonable unaligned vector stores. Starting with the P8, we have fast unaligned loads too. For QPX, we use vector loads are stores, but only for aligned memory accesses. llvm-svn: 230788
author: Hal Finkel <hfinkel@anl.gov> 2015-02-27 19:58:28 +0000
committer: Hal Finkel <hfinkel@anl.gov> 2015-02-27 19:58:28 +0000
commit: 5c3cacf5c0dfe0c8ddcf3784da436f9ec8273531 (patch)
tree: 0bab33109d709077c70dc29f71d2bbb3a821736b /llvm/lib/Target
parent: 686b1fe65ae90f531d5010b3c6b47b479d4b7ccd (diff)
download: bcm5719-llvm-5c3cacf5c0dfe0c8ddcf3784da436f9ec8273531.tar.gz
bcm5719-llvm-5c3cacf5c0dfe0c8ddcf3784da436f9ec8273531.zip
1 files changed, 25 insertions, 2 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 147e94b560a..7de2ae10b8b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -892,6 +892,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     MaxStoresPerMemcpyOptSize = 8;
     MaxStoresPerMemmove = 32;
     MaxStoresPerMemmoveOptSize = 8;
+  } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
+    // The A2 also benefits from (very) aggressive inlining of memcpy and
+    // friends. The overhead of a the function call, even when warm, can be
+    // over one hundred cycles.
+    MaxStoresPerMemset = 128;
+    MaxStoresPerMemcpy = 128;
+    MaxStoresPerMemmove = 128;
   }
 }
 
@@ -10914,11 +10921,27 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
                                            bool IsMemset, bool ZeroMemset,
                                            bool MemcpyStrSrc,
                                            MachineFunction &MF) const {
+  const Function *F = MF.getFunction();
+  // When expanding a memset, require at least two QPX instructions to cover
+  // the cost of loading the value to be stored from the constant pool.
+  if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
+     (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
+      !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
+    return MVT::v4f64;
+  }
+
+  // We should use Altivec/VSX loads and stores when available. For unaligned
+  // addresses, unaligned VSX loads are only fast starting with the P8.
+  if (Subtarget.hasAltivec() && Size >= 16 &&
+      (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
+       ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
+    return MVT::v4i32;
+
   if (Subtarget.isPPC64()) {
     return MVT::i64;
-  } else {
-    return MVT::i32;
   }
+
+  return MVT::i32;
 }
 
 /// \brief Returns true if it is beneficial to convert a load of a constant
author	Hal Finkel <hfinkel@anl.gov>	2015-02-27 19:58:28 +0000
committer	Hal Finkel <hfinkel@anl.gov>	2015-02-27 19:58:28 +0000
commit	5c3cacf5c0dfe0c8ddcf3784da436f9ec8273531 (patch)
tree	0bab33109d709077c70dc29f71d2bbb3a821736b /llvm/lib/Target
parent	686b1fe65ae90f531d5010b3c6b47b479d4b7ccd (diff)
download	bcm5719-llvm-5c3cacf5c0dfe0c8ddcf3784da436f9ec8273531.tar.gz bcm5719-llvm-5c3cacf5c0dfe0c8ddcf3784da436f9ec8273531.zip