diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/haddsub-3.ll | 66 |
2 files changed, 71 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d1e0f690913..f7c91ba0e65 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -32820,10 +32820,13 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) { // For a broadcast, peek through an extract element of index 0 to find the // horizontal op: broadcast (ext_vec_elt HOp, 0) + EVT VT = N->getValueType(0); if (Opcode == X86ISD::VBROADCAST) { SDValue SrcOp = N->getOperand(0); if (SrcOp.getOpcode() == ISD::EXTRACT_VECTOR_ELT && - SrcOp.getValueType() == MVT::f64 && isNullConstant(SrcOp.getOperand(1))) + SrcOp.getValueType() == MVT::f64 && + SrcOp.getOperand(0).getValueType() == VT && + isNullConstant(SrcOp.getOperand(1))) N = SrcOp.getNode(); } @@ -32847,7 +32850,8 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) { // movddup (hadd X, X) --> hadd X, X // broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X assert((HOp.getValueType() == MVT::v2f64 || - HOp.getValueType() == MVT::v4f64) && "Unexpected type for h-op"); + HOp.getValueType() == MVT::v4f64) && HOp.getValueType() == VT && + "Unexpected type for h-op"); return HOp; } diff --git a/llvm/test/CodeGen/X86/haddsub-3.ll b/llvm/test/CodeGen/X86/haddsub-3.ll index b1406fc3417..29b9a626dd2 100644 --- a/llvm/test/CodeGen/X86/haddsub-3.ll +++ b/llvm/test/CodeGen/X86/haddsub-3.ll @@ -1,7 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 define float @pr26491(<4 x float> %a0) { ; SSE2-LABEL: pr26491: @@ -37,3 +38,66 @@ define float @pr26491(<4 x float> %a0) { %5 = fadd float %3, %4 ret float %5 } + +; When simplifying away a splat (broadcast), the hop type must match the shuffle type. + +define <4 x double> @PR41414(i64 %x, <4 x double> %y) { +; SSE2-LABEL: PR41414: +; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdi, %xmm2 +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1] +; SSE2-NEXT: subpd {{.*}}(%rip), %xmm2 +; SSE2-NEXT: movapd %xmm2, %xmm3 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm2[1] +; SSE2-NEXT: addpd %xmm2, %xmm3 +; SSE2-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0,0] +; SSE2-NEXT: divpd %xmm3, %xmm1 +; SSE2-NEXT: divpd %xmm3, %xmm0 +; SSE2-NEXT: xorpd %xmm2, %xmm2 +; SSE2-NEXT: addpd %xmm2, %xmm0 +; SSE2-NEXT: addpd %xmm2, %xmm1 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: PR41414: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movq %rdi, %xmm2 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[1],mem[1] +; SSSE3-NEXT: subpd {{.*}}(%rip), %xmm2 +; SSSE3-NEXT: haddpd %xmm2, %xmm2 +; SSSE3-NEXT: divpd %xmm2, %xmm1 +; SSSE3-NEXT: divpd %xmm2, %xmm0 +; SSSE3-NEXT: xorpd %xmm2, %xmm2 +; SSSE3-NEXT: addpd %xmm2, %xmm0 +; SSSE3-NEXT: addpd %xmm2, %xmm1 +; SSSE3-NEXT: retq +; +; AVX1-LABEL: PR41414: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovq %rdi, %xmm1 +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] +; AVX1-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vhaddpd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 +; AVX1-NEXT: vdivpd %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: PR41414: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovq %rdi, %xmm1 +; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] +; AVX2-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1 +; AVX2-NEXT: vhaddpd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1 +; AVX2-NEXT: vdivpd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq + %conv = uitofp i64 %x to double + %t0 = insertelement <4 x double> undef, double %conv, i32 0 + %t1 = shufflevector <4 x double> %t0, <4 x double> undef, <4 x i32> zeroinitializer + %t2 = fdiv <4 x double> %y, %t1 + %t3 = fadd <4 x double> zeroinitializer, %t2 + ret <4 x double> %t3 +} |