diff options
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 58 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/scalarize-fp.ll | 112 |
2 files changed, 98 insertions, 72 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c61b24f8d21..6424f30b310 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -18675,6 +18675,61 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { return SDValue(); } +/// If a vector binop is performed on build vector operands that only have one +/// non-undef element, it may be profitable to extract, scalarize, and insert. +static SDValue scalarizeBinOpOfBuildVectors(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (N0.getOpcode() != ISD::BUILD_VECTOR || N0.getOpcode() != N1.getOpcode()) + return SDValue(); + + // Return the index of exactly one scalar element in an otherwise undefined + // build vector. + auto getScalarIndex = [](SDValue V) { + int NotUndefIndex = -1; + for (unsigned i = 0, e = V.getNumOperands(); i != e; ++i) { + // Ignore undef elements. + if (V.getOperand(i).isUndef()) + continue; + // There can be only one. + if (NotUndefIndex >= 0) + return -1; + // This might be the only non-undef operand. + NotUndefIndex = i; + } + return NotUndefIndex; + }; + int N0Index = getScalarIndex(N0); + if (N0Index == -1) + return SDValue(); + int N1Index = getScalarIndex(N1); + if (N1Index == -1) + return SDValue(); + + SDValue X = N0.getOperand(N0Index); + SDValue Y = N1.getOperand(N1Index); + EVT ScalarVT = X.getValueType(); + if (ScalarVT != Y.getValueType()) + return SDValue(); + + // TODO: Remove/replace the extract cost check? If the elements are available + // as scalars, then there may be no extract cost. Should we ask if + // inserting a scalar back into a vector is cheap instead? + EVT VT = N->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (N0Index != N1Index || !TLI.isExtractVecEltCheap(VT, N0Index) || + !TLI.isOperationLegalOrCustom(N->getOpcode(), ScalarVT)) + return SDValue(); + + // bo (build_vec ...undef, x, undef...), (build_vec ...undef, y, undef...) --> + // build_vec ...undef, (bo x, y), undef... + SDValue ScalarBO = DAG.getNode(N->getOpcode(), SDLoc(N), ScalarVT, X, Y, + N->getFlags()); + SmallVector<SDValue, 8> Ops(N0.getNumOperands(), DAG.getUNDEF(ScalarVT)); + Ops[N0Index] = ScalarBO; + return DAG.getBuildVector(VT, SDLoc(N), Ops); +} + /// Visit a binary vector operation, like ADD. SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { assert(N->getValueType(0).isVector() && @@ -18737,6 +18792,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { } } + if (SDValue V = scalarizeBinOpOfBuildVectors(N, DAG)) + return V; + return SDValue(); } diff --git a/llvm/test/CodeGen/X86/scalarize-fp.ll b/llvm/test/CodeGen/X86/scalarize-fp.ll index fb665e928e2..40eed1ff38c 100644 --- a/llvm/test/CodeGen/X86/scalarize-fp.ll +++ b/llvm/test/CodeGen/X86/scalarize-fp.ll @@ -5,14 +5,12 @@ define <4 x float> @fadd_op1_constant_v4f32(float %x) nounwind { ; SSE-LABEL: fadd_op1_constant_v4f32: ; SSE: # %bb.0: -; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE-NEXT: addps %xmm1, %xmm0 +; SSE-NEXT: addss {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: fadd_op1_constant_v4f32: ; AVX: # %bb.0: -; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %v = insertelement <4 x float> undef, float %x, i32 0 %b = fadd <4 x float> %v, <float 42.0, float undef, float undef, float undef> @@ -22,16 +20,14 @@ define <4 x float> @fadd_op1_constant_v4f32(float %x) nounwind { define <4 x float> @load_fadd_op1_constant_v4f32(float* %p) nounwind { ; SSE-LABEL: load_fadd_op1_constant_v4f32: ; SSE: # %bb.0: -; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: addps %xmm1, %xmm0 +; SSE-NEXT: addss {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: load_fadd_op1_constant_v4f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %x = load float, float* %p %v = insertelement <4 x float> undef, float %x, i32 0 @@ -43,14 +39,14 @@ define <4 x float> @fsub_op0_constant_v4f32(float %x) nounwind { ; SSE-LABEL: fsub_op0_constant_v4f32: ; SSE: # %bb.0: ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE-NEXT: subps %xmm0, %xmm1 +; SSE-NEXT: subss %xmm0, %xmm1 ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: fsub_op0_constant_v4f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vsubps %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq %v = insertelement <4 x float> undef, float %x, i32 0 %b = fsub <4 x float> <float 42.0, float undef, float undef, float undef>, %v @@ -60,16 +56,14 @@ define <4 x float> @fsub_op0_constant_v4f32(float %x) nounwind { define <4 x float> @load_fsub_op0_constant_v4f32(float* %p) nounwind { ; SSE-LABEL: load_fsub_op0_constant_v4f32: ; SSE: # %bb.0: -; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: subps %xmm1, %xmm0 +; SSE-NEXT: subss (%rdi), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: load_fsub_op0_constant_v4f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vsubps %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vsubss (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq %x = load float, float* %p %v = insertelement <4 x float> undef, float %x, i32 0 @@ -80,14 +74,12 @@ define <4 x float> @load_fsub_op0_constant_v4f32(float* %p) nounwind { define <4 x float> @fmul_op1_constant_v4f32(float %x) nounwind { ; SSE-LABEL: fmul_op1_constant_v4f32: ; SSE: # %bb.0: -; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE-NEXT: mulps %xmm1, %xmm0 +; SSE-NEXT: mulss {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: fmul_op1_constant_v4f32: ; AVX: # %bb.0: -; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %v = insertelement <4 x float> undef, float %x, i32 0 %b = fmul <4 x float> %v, <float 42.0, float undef, float undef, float undef> @@ -97,16 +89,14 @@ define <4 x float> @fmul_op1_constant_v4f32(float %x) nounwind { define <4 x float> @load_fmul_op1_constant_v4f32(float* %p) nounwind { ; SSE-LABEL: load_fmul_op1_constant_v4f32: ; SSE: # %bb.0: -; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: mulps %xmm1, %xmm0 +; SSE-NEXT: mulss {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: load_fmul_op1_constant_v4f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %x = load float, float* %p %v = insertelement <4 x float> undef, float %x, i32 0 @@ -117,14 +107,12 @@ define <4 x float> @load_fmul_op1_constant_v4f32(float* %p) nounwind { define <4 x float> @fdiv_op1_constant_v4f32(float %x) nounwind { ; SSE-LABEL: fdiv_op1_constant_v4f32: ; SSE: # %bb.0: -; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE-NEXT: divps %xmm1, %xmm0 +; SSE-NEXT: divss {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: fdiv_op1_constant_v4f32: ; AVX: # %bb.0: -; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vdivps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vdivss {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %v = insertelement <4 x float> undef, float %x, i32 0 %b = fdiv <4 x float> %v, <float 42.0, float undef, float undef, float undef> @@ -135,15 +123,13 @@ define <4 x float> @load_fdiv_op1_constant_v4f32(float* %p) nounwind { ; SSE-LABEL: load_fdiv_op1_constant_v4f32: ; SSE: # %bb.0: ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE-NEXT: divps %xmm1, %xmm0 +; SSE-NEXT: divss {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: load_fdiv_op1_constant_v4f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vdivps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vdivss {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %x = load float, float* %p %v = insertelement <4 x float> undef, float %x, i32 0 @@ -155,14 +141,14 @@ define <4 x float> @fdiv_op0_constant_v4f32(float %x) nounwind { ; SSE-LABEL: fdiv_op0_constant_v4f32: ; SSE: # %bb.0: ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE-NEXT: divps %xmm0, %xmm1 +; SSE-NEXT: divss %xmm0, %xmm1 ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: fdiv_op0_constant_v4f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vdivps %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq %v = insertelement <4 x float> undef, float %x, i32 0 %b = fdiv <4 x float> <float 42.0, float undef, float undef, float undef>, %v @@ -172,16 +158,14 @@ define <4 x float> @fdiv_op0_constant_v4f32(float %x) nounwind { define <4 x float> @load_fdiv_op0_constant_v4f32(float* %p) nounwind { ; SSE-LABEL: load_fdiv_op0_constant_v4f32: ; SSE: # %bb.0: -; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: divps %xmm1, %xmm0 +; SSE-NEXT: divss (%rdi), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: load_fdiv_op0_constant_v4f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vdivps %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vdivss (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq %x = load float, float* %p %v = insertelement <4 x float> undef, float %x, i32 0 @@ -192,14 +176,12 @@ define <4 x float> @load_fdiv_op0_constant_v4f32(float* %p) nounwind { define <4 x double> @fadd_op1_constant_v4f64(double %x) nounwind { ; SSE-LABEL: fadd_op1_constant_v4f64: ; SSE: # %bb.0: -; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; SSE-NEXT: addpd %xmm1, %xmm0 +; SSE-NEXT: addsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: fadd_op1_constant_v4f64: ; AVX: # %bb.0: -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %v = insertelement <4 x double> undef, double %x, i32 0 %b = fadd <4 x double> %v, <double 42.0, double undef, double undef, double undef> @@ -209,16 +191,14 @@ define <4 x double> @fadd_op1_constant_v4f64(double %x) nounwind { define <4 x double> @load_fadd_op1_constant_v4f64(double* %p) nounwind { ; SSE-LABEL: load_fadd_op1_constant_v4f64: ; SSE: # %bb.0: -; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: addpd %xmm1, %xmm0 +; SSE-NEXT: addsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: load_fadd_op1_constant_v4f64: ; AVX: # %bb.0: ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %x = load double, double* %p %v = insertelement <4 x double> undef, double %x, i32 0 @@ -230,14 +210,14 @@ define <4 x double> @fsub_op0_constant_v4f64(double %x) nounwind { ; SSE-LABEL: fsub_op0_constant_v4f64: ; SSE: # %bb.0: ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; SSE-NEXT: subpd %xmm0, %xmm1 +; SSE-NEXT: subsd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: fsub_op0_constant_v4f64: ; AVX: # %bb.0: ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq %v = insertelement <4 x double> undef, double %x, i32 0 %b = fsub <4 x double> <double 42.0, double undef, double undef, double undef>, %v @@ -247,16 +227,14 @@ define <4 x double> @fsub_op0_constant_v4f64(double %x) nounwind { define <4 x double> @load_fsub_op0_constant_v4f64(double* %p) nounwind { ; SSE-LABEL: load_fsub_op0_constant_v4f64: ; SSE: # %bb.0: -; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: subpd %xmm1, %xmm0 +; SSE-NEXT: subsd (%rdi), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: load_fsub_op0_constant_v4f64: ; AVX: # %bb.0: ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq %x = load double, double* %p %v = insertelement <4 x double> undef, double %x, i32 0 @@ -267,14 +245,12 @@ define <4 x double> @load_fsub_op0_constant_v4f64(double* %p) nounwind { define <4 x double> @fmul_op1_constant_v4f64(double %x) nounwind { ; SSE-LABEL: fmul_op1_constant_v4f64: ; SSE: # %bb.0: -; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; SSE-NEXT: mulpd %xmm1, %xmm0 +; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: fmul_op1_constant_v4f64: ; AVX: # %bb.0: -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %v = insertelement <4 x double> undef, double %x, i32 0 %b = fmul <4 x double> %v, <double 42.0, double undef, double undef, double undef> @@ -284,16 +260,14 @@ define <4 x double> @fmul_op1_constant_v4f64(double %x) nounwind { define <4 x double> @load_fmul_op1_constant_v4f64(double* %p) nounwind { ; SSE-LABEL: load_fmul_op1_constant_v4f64: ; SSE: # %bb.0: -; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: mulpd %xmm1, %xmm0 +; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: load_fmul_op1_constant_v4f64: ; AVX: # %bb.0: ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %x = load double, double* %p %v = insertelement <4 x double> undef, double %x, i32 0 @@ -304,14 +278,12 @@ define <4 x double> @load_fmul_op1_constant_v4f64(double* %p) nounwind { define <4 x double> @fdiv_op1_constant_v4f64(double %x) nounwind { ; SSE-LABEL: fdiv_op1_constant_v4f64: ; SSE: # %bb.0: -; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; SSE-NEXT: divpd %xmm1, %xmm0 +; SSE-NEXT: divsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: fdiv_op1_constant_v4f64: ; AVX: # %bb.0: -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vdivpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vdivsd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %v = insertelement <4 x double> undef, double %x, i32 0 %b = fdiv <4 x double> %v, <double 42.0, double undef, double undef, double undef> @@ -322,15 +294,13 @@ define <4 x double> @load_fdiv_op1_constant_v4f64(double* %p) nounwind { ; SSE-LABEL: load_fdiv_op1_constant_v4f64: ; SSE: # %bb.0: ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; SSE-NEXT: divpd %xmm1, %xmm0 +; SSE-NEXT: divsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: load_fdiv_op1_constant_v4f64: ; AVX: # %bb.0: ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vdivpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vdivsd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq %x = load double, double* %p %v = insertelement <4 x double> undef, double %x, i32 0 @@ -342,14 +312,14 @@ define <4 x double> @fdiv_op0_constant_v4f64(double %x) nounwind { ; SSE-LABEL: fdiv_op0_constant_v4f64: ; SSE: # %bb.0: ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; SSE-NEXT: divpd %xmm0, %xmm1 +; SSE-NEXT: divsd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: fdiv_op0_constant_v4f64: ; AVX: # %bb.0: ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vdivpd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq %v = insertelement <4 x double> undef, double %x, i32 0 %b = fdiv <4 x double> <double 42.0, double undef, double undef, double undef>, %v @@ -359,16 +329,14 @@ define <4 x double> @fdiv_op0_constant_v4f64(double %x) nounwind { define <4 x double> @load_fdiv_op0_constant_v4f64(double* %p) nounwind { ; SSE-LABEL: load_fdiv_op0_constant_v4f64: ; SSE: # %bb.0: -; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: divpd %xmm1, %xmm0 +; SSE-NEXT: divsd (%rdi), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: load_fdiv_op0_constant_v4f64: ; AVX: # %bb.0: ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vdivpd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vdivsd (%rdi), %xmm0, %xmm0 ; AVX-NEXT: retq %x = load double, double* %p %v = insertelement <4 x double> undef, double %x, i32 0 |