merge vector stores into wider vector stores and fix AArch64 misaligned access TLI hook (PR21711)

This is a redo of D7208 ( r227242 - http://llvm.org/viewvc/llvm-project?view=revision&revision=227242 ). The patch was reverted because an AArch64 target could infinite loop after the change in DAGCombiner to merge vector stores. That happened because AArch64's allowsMisalignedMemoryAccesses() wasn't telling the truth. It reported all unaligned memory accesses as fast, but then split some 128-bit unaligned accesses up in performSTORECombine() because they are slow. This patch attempts to fix the problem in AArch's allowsMisalignedMemoryAccesses() while preserving existing (perhaps questionable) lowering behavior. The x86 test shows that store merging is working as intended for a target with fast 32-byte unaligned stores. Differential Revision: http://reviews.llvm.org/D12635 llvm-svn: 248622
author: Sanjay Patel <spatel@rotateright.com> 2015-09-25 21:49:48 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2015-09-25 21:49:48 +0000
commit: bbbf9a1a34af09b6e271f2288c32429e1a53f3e6 (patch)
tree: 18489b94ed026e4c10217aa4663e46fb1db27200 /llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
parent: d0626804fc04f89c0047db54ffd8a843da1f5b4a (diff)
download: bcm5719-llvm-bbbf9a1a34af09b6e271f2288c32429e1a53f3e6.tar.gz
bcm5719-llvm-bbbf9a1a34af09b6e271f2288c32429e1a53f3e6.zip
1 files changed, 24 insertions, 11 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ad60b8244f1..01ca2884d03 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11072,8 +11072,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
     return false;
 
-  // Don't merge vectors into wider inputs.
-  if (MemVT.isVector() || !MemVT.isSimple())
+  if (!MemVT.isSimple())
     return false;
 
   // Perform an early exit check. Do not bother looking at stored values that
@@ -11082,9 +11081,16 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
                        isa<ConstantFPSDNode>(StoredVal);
-  bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
+  bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+                          StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
 
-  if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
+  if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
+    return false;
+
+  // Don't merge vectors into wider vectors if the source data comes from loads.
+  // TODO: This restriction can be lifted by using logic similar to the
+  // ExtractVecSrc case.
+  if (MemVT.isVector() && IsLoadSrc)
     return false;
 
   // Only look at ends of store sequences.
@@ -11217,29 +11223,36 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
 
   // When extracting multiple vector elements, try to store them
   // in one vector store rather than a sequence of scalar stores.
-  if (IsExtractVecEltSrc) {
-    unsigned NumElem = 0;
+  if (IsExtractVecSrc) {
+    unsigned NumStoresToMerge = 0;
+    bool IsVec = MemVT.isVector();
     for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
-      SDValue StoredVal = St->getValue();
+      unsigned StoreValOpcode = St->getValue().getOpcode();
       // This restriction could be loosened.
       // Bail out if any stored values are not elements extracted from a vector.
       // It should be possible to handle mixed sources, but load sources need
       // more careful handling (see the block of code below that handles
       // consecutive loads).
-      if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+      if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
+          StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
         return false;
 
       // Find a legal type for the vector store.
-      EVT Ty = EVT::getVectorVT(Context, MemVT, i+1);
+      unsigned Elts = i + 1;
+      if (IsVec) {
+        // When merging vector stores, get the total number of elements.
+        Elts *= MemVT.getVectorNumElements();
+      }
+      EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
       bool IsFast;
       if (TLI.isTypeLegal(Ty) &&
           TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
                                  FirstStoreAlign, &IsFast) && IsFast)
-        NumElem = i + 1;
+        NumStoresToMerge = i + 1;
     }
 
-    return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
+    return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge,
                                            false, true);
   }
author	Sanjay Patel <spatel@rotateright.com>	2015-09-25 21:49:48 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2015-09-25 21:49:48 +0000
commit	bbbf9a1a34af09b6e271f2288c32429e1a53f3e6 (patch)
tree	18489b94ed026e4c10217aa4663e46fb1db27200 /llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
parent	d0626804fc04f89c0047db54ffd8a843da1f5b4a (diff)
download	bcm5719-llvm-bbbf9a1a34af09b6e271f2288c32429e1a53f3e6.tar.gz bcm5719-llvm-bbbf9a1a34af09b6e271f2288c32429e1a53f3e6.zip