[x86] fix allowsMisalignedMemoryAccess() implementation

This patch fixes the x86 implementation of allowsMisalignedMemoryAccess() to correctly return the 'Fast' output parameter for 32-byte accesses. To test that, an existing load merging optimization is changed to use the TLI hook. This exposes a shortcoming in the current logic and results in the regression test update. Changing other direct users of the isUnalignedMem32Slow() x86 CPU attribute would be a follow-on patch. Without the fix in allowsMisalignedMemoryAccesses(), we will infinite loop when targeting SandyBridge because LowerINSERT_SUBVECTOR() creates 32-byte loads from two 16-byte loads while PerformLOADCombine() splits them back into 16-byte loads. Differential Revision: http://reviews.llvm.org/D10662 llvm-svn: 245075
author: Sanjay Patel <spatel@rotateright.com> 2015-08-14 17:53:40 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2015-08-14 17:53:40 +0000
commit: ed502905f7c285959c5555bc01f8af762dadb8d6 (patch)
tree: 3fd3da3e84329fb1b344b69c81712c44ff06a0b6 /llvm/lib
parent: 06f0678010428b0c69612fdd32a59262dfe14d59 (diff)
download: bcm5719-llvm-ed502905f7c285959c5555bc01f8af762dadb8d6.tar.gz
bcm5719-llvm-ed502905f7c285959c5555bc01f8af762dadb8d6.zip
1 files changed, 27 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9a790010f8c..68a15e021cd 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1915,8 +1915,14 @@ X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
                                                   unsigned,
                                                   unsigned,
                                                   bool *Fast) const {
-  if (Fast)
-    *Fast = Subtarget->isUnalignedMemAccessFast();
+  if (Fast) {
+    // FIXME: We should be checking 128-bit accesses separately from smaller
+    // accesses.
+    if (VT.getSizeInBits() == 256)
+      *Fast = !Subtarget->isUnalignedMem32Slow();
+    else
+      *Fast = Subtarget->isUnalignedMemAccessFast();
+  }
   return true;
 }
 
@@ -11259,14 +11265,25 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
   // --> load32 addr
   if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
       Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
-      OpVT.is256BitVector() && SubVecVT.is128BitVector() &&
-      !Subtarget->isUnalignedMem32Slow()) {
-    SDValue SubVec2 = Vec.getOperand(1);
-    if (auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2))) {
-      if (Idx2->getZExtValue() == 0) {
-        SDValue Ops[] = { SubVec2, SubVec };
-        if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
-          return Ld;
+      OpVT.is256BitVector() && SubVecVT.is128BitVector()) {
+    auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2));
+    if (Idx2 && Idx2->getZExtValue() == 0) {
+      SDValue SubVec2 = Vec.getOperand(1);
+      // If needed, look through a bitcast to get to the load.
+      if (SubVec2.getNode() && SubVec2.getOpcode() == ISD::BITCAST)
+        SubVec2 = SubVec2.getOperand(0);
+      
+      if (auto *FirstLd = dyn_cast<LoadSDNode>(SubVec2)) {
+        bool Fast;
+        unsigned Alignment = FirstLd->getAlignment();
+        unsigned AS = FirstLd->getAddressSpace();
+        const X86TargetLowering *TLI = Subtarget->getTargetLowering();
+        if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+                                    OpVT, AS, Alignment, &Fast) && Fast) {
+          SDValue Ops[] = { SubVec2, SubVec };
+          if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
+            return Ld;
+        }
       }
     }
   }
author	Sanjay Patel <spatel@rotateright.com>	2015-08-14 17:53:40 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2015-08-14 17:53:40 +0000
commit	ed502905f7c285959c5555bc01f8af762dadb8d6 (patch)
tree	3fd3da3e84329fb1b344b69c81712c44ff06a0b6 /llvm/lib
parent	06f0678010428b0c69612fdd32a59262dfe14d59 (diff)
download	bcm5719-llvm-ed502905f7c285959c5555bc01f8af762dadb8d6.tar.gz bcm5719-llvm-ed502905f7c285959c5555bc01f8af762dadb8d6.zip