[DAGCombiner] allow undef shuffle operands when eliminating bitcasts (PR34111)

As noted in the FIXME, this could be improved more, but this is the smallest fix that helps: https://bugs.llvm.org/show_bug.cgi?id=34111 llvm-svn: 311853
author: Sanjay Patel <spatel@rotateright.com> 2017-08-27 17:29:30 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2017-08-27 17:29:30 +0000
commit: a7a61d97687af958346b24125d08cd1d886201db (patch)
tree: eb81f96e3ee17233ce3d30404cf38bf3246575c5
parent: 4e4ba615b25372eb56d7fa76bf88dc16d9d34a98 (diff)
download: bcm5719-llvm-a7a61d97687af958346b24125d08cd1d886201db.tar.gz
bcm5719-llvm-a7a61d97687af958346b24125d08cd1d886201db.zip
2 files changed, 6 insertions, 8 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6e106bb869a..df38ef423fa 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8863,12 +8863,15 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
       if (Op.getOpcode() == ISD::BITCAST &&
           Op.getOperand(0).getValueType() == VT)
         return SDValue(Op.getOperand(0));
-      if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
+      if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
         return DAG.getBitcast(VT, Op);
       return SDValue();
     };
 
+    // FIXME: If either input vector is bitcast, try to convert the shuffle to
+    // the result type of this bitcast. This would eliminate at least one
+    // bitcast. See the transform in InstCombine.
     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
     if (!(SV0 && SV1))
diff --git a/llvm/test/CodeGen/X86/haddsub.ll b/llvm/test/CodeGen/X86/haddsub.ll
index b63d97df52a..7fbd4169a26 100644
--- a/llvm/test/CodeGen/X86/haddsub.ll
+++ b/llvm/test/CodeGen/X86/haddsub.ll
@@ -401,18 +401,13 @@ define <2 x float> @haddps_v2f32(<4 x float> %v0) {
 define <4 x float> @PR34111(<4 x float> %a) {
 ; SSE3-LABEL: PR34111:
 ; SSE3:       # BB#0:
-; SSE3-NEXT:    movaps %xmm0, %xmm1
-; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3,2,3]
-; SSE3-NEXT:    addps %xmm1, %xmm0
+; SSE3-NEXT:    haddps %xmm0, %xmm0
 ; SSE3-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
 ; SSE3-NEXT:    retq
 ;
 ; AVX-LABEL: PR34111:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,2,2,3]
-; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,3,2,3]
-; AVX-NEXT:    vaddps %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vhaddps %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
 ; AVX-NEXT:    retq
   %a02 = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 2>
author	Sanjay Patel <spatel@rotateright.com>	2017-08-27 17:29:30 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2017-08-27 17:29:30 +0000
commit	a7a61d97687af958346b24125d08cd1d886201db (patch)
tree	eb81f96e3ee17233ce3d30404cf38bf3246575c5
parent	4e4ba615b25372eb56d7fa76bf88dc16d9d34a98 (diff)
download	bcm5719-llvm-a7a61d97687af958346b24125d08cd1d886201db.tar.gz bcm5719-llvm-a7a61d97687af958346b24125d08cd1d886201db.zip