[x86] Combine x86mmx/i64 to v2i64 conversion to use scalar_to_vector

Handle the poor codegen for i64/x86xmm->v2i64 (%mm -> %xmm) moves. Instead of using stack store/load pair to do the job, use scalar_to_vector directly, which in the MMX case can use movq2dq. This was the current behavior prior to improvements for vector legalization of extloads in r213897. This commit fixes the regression and as a side-effect also remove some unnecessary shuffles. In the new attached testcase, we go from: pshufw $-18, (%rdi), %mm0 movq %mm0, -8(%rsp) movq -8(%rsp), %xmm0 pshufd $-44, %xmm0, %xmm0 movd %xmm0, %eax ... To: pshufw $-18, (%rdi), %mm0 movq2dq %mm0, %xmm0 movd %xmm0, %eax ... Differential Revision: http://reviews.llvm.org/D7126 rdar://problem/19413324 llvm-svn: 226953
author: Bruno Cardoso Lopes <bruno.cardoso@gmail.com> 2015-01-23 22:44:16 +0000
committer: Bruno Cardoso Lopes <bruno.cardoso@gmail.com> 2015-01-23 22:44:16 +0000
commit: 56567f9135051d63f18635132a4b45a57a19957e (patch)
tree: fd36ebfbe5028822101a063097857895cc473c82 /llvm/lib
parent: 011c7425355edc56ca13d968b0fe2cea72f3ad53 (diff)
download: bcm5719-llvm-56567f9135051d63f18635132a4b45a57a19957e.tar.gz
bcm5719-llvm-56567f9135051d63f18635132a4b45a57a19957e.zip
1 files changed, 29 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e92a099753b..37026ce0f12 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24757,6 +24757,8 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
   LoadSDNode *Ld = cast<LoadSDNode>(N);
   EVT RegVT = Ld->getValueType(0);
   EVT MemVT = Ld->getMemoryVT();
+  SDValue Ptr   = Ld->getBasePtr();
+  SDValue Chain = Ld->getChain();
   SDLoc dl(Ld);
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
@@ -24795,6 +24797,33 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
     return DCI.CombineTo(N, NewVec, TF, true);
   }
 
+  // Conversion from x86mmx/i64 to v2i64 types is often done via stack
+  // store/load. Under certain conditions we can bypass the memory access and
+  // combine this load to use a scalar_to_vector instead. This leads to
+  // a reduction in the stack use, redundant emission of shuffles and create
+  // isel matching candidates for movq2dq instructions.
+  if (RegVT == MVT::v2i64 && Subtarget->hasSSE2() && Ext == ISD::EXTLOAD &&
+      !Ld->isVolatile() && ISD::isNON_TRUNCStore(Chain.getNode())) {
+
+    // If this load is directly stored, get the original source value.
+    StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
+    EVT SrcTy = PrevST->getValue().getValueType();
+    if (PrevST->getBasePtr() != Ptr ||
+        !(SrcTy == MVT::i64 || SrcTy == MVT::x86mmx))
+      return SDValue();
+    SDValue SrcVal = Chain.getOperand(1);
+
+    // On 32bit systems, we can't save 64bit integers, use f64 instead.
+    bool Usef64 = TLI.isTypeLegal(MVT::f64) && !Subtarget->is64Bit();
+    if (Usef64)
+      SrcVal = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SrcVal);
+    SrcVal = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, Usef64 ? MVT::v2f64 : RegVT,
+                              SrcVal);
+
+    return DCI.CombineTo(N, Usef64 ?
+        DAG.getNode(ISD::BITCAST, dl, RegVT, SrcVal) : SrcVal, Chain);
+  }
+
   return SDValue();
 }
author	Bruno Cardoso Lopes <bruno.cardoso@gmail.com>	2015-01-23 22:44:16 +0000
committer	Bruno Cardoso Lopes <bruno.cardoso@gmail.com>	2015-01-23 22:44:16 +0000
commit	56567f9135051d63f18635132a4b45a57a19957e (patch)
tree	fd36ebfbe5028822101a063097857895cc473c82 /llvm/lib
parent	011c7425355edc56ca13d968b0fe2cea72f3ad53 (diff)
download	bcm5719-llvm-56567f9135051d63f18635132a4b45a57a19957e.tar.gz bcm5719-llvm-56567f9135051d63f18635132a4b45a57a19957e.zip