summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp58
-rw-r--r--llvm/test/CodeGen/X86/scalarize-fp.ll112
2 files changed, 98 insertions, 72 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c61b24f8d21..6424f30b310 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18675,6 +18675,61 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
return SDValue();
}
+/// If a vector binop is performed on build vector operands that only have one
+/// non-undef element, it may be profitable to extract, scalarize, and insert.
+static SDValue scalarizeBinOpOfBuildVectors(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (N0.getOpcode() != ISD::BUILD_VECTOR || N0.getOpcode() != N1.getOpcode())
+ return SDValue();
+
+ // Return the index of exactly one scalar element in an otherwise undefined
+ // build vector.
+ auto getScalarIndex = [](SDValue V) {
+ int NotUndefIndex = -1;
+ for (unsigned i = 0, e = V.getNumOperands(); i != e; ++i) {
+ // Ignore undef elements.
+ if (V.getOperand(i).isUndef())
+ continue;
+ // There can be only one.
+ if (NotUndefIndex >= 0)
+ return -1;
+ // This might be the only non-undef operand.
+ NotUndefIndex = i;
+ }
+ return NotUndefIndex;
+ };
+ int N0Index = getScalarIndex(N0);
+ if (N0Index == -1)
+ return SDValue();
+ int N1Index = getScalarIndex(N1);
+ if (N1Index == -1)
+ return SDValue();
+
+ SDValue X = N0.getOperand(N0Index);
+ SDValue Y = N1.getOperand(N1Index);
+ EVT ScalarVT = X.getValueType();
+ if (ScalarVT != Y.getValueType())
+ return SDValue();
+
+ // TODO: Remove/replace the extract cost check? If the elements are available
+ // as scalars, then there may be no extract cost. Should we ask if
+ // inserting a scalar back into a vector is cheap instead?
+ EVT VT = N->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (N0Index != N1Index || !TLI.isExtractVecEltCheap(VT, N0Index) ||
+ !TLI.isOperationLegalOrCustom(N->getOpcode(), ScalarVT))
+ return SDValue();
+
+ // bo (build_vec ...undef, x, undef...), (build_vec ...undef, y, undef...) -->
+ // build_vec ...undef, (bo x, y), undef...
+ SDValue ScalarBO = DAG.getNode(N->getOpcode(), SDLoc(N), ScalarVT, X, Y,
+ N->getFlags());
+ SmallVector<SDValue, 8> Ops(N0.getNumOperands(), DAG.getUNDEF(ScalarVT));
+ Ops[N0Index] = ScalarBO;
+ return DAG.getBuildVector(VT, SDLoc(N), Ops);
+}
+
/// Visit a binary vector operation, like ADD.
SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
assert(N->getValueType(0).isVector() &&
@@ -18737,6 +18792,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
}
}
+ if (SDValue V = scalarizeBinOpOfBuildVectors(N, DAG))
+ return V;
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/X86/scalarize-fp.ll b/llvm/test/CodeGen/X86/scalarize-fp.ll
index fb665e928e2..40eed1ff38c 100644
--- a/llvm/test/CodeGen/X86/scalarize-fp.ll
+++ b/llvm/test/CodeGen/X86/scalarize-fp.ll
@@ -5,14 +5,12 @@
define <4 x float> @fadd_op1_constant_v4f32(float %x) nounwind {
; SSE-LABEL: fadd_op1_constant_v4f32:
; SSE: # %bb.0:
-; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE-NEXT: addps %xmm1, %xmm0
+; SSE-NEXT: addss {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: fadd_op1_constant_v4f32:
; AVX: # %bb.0:
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
%v = insertelement <4 x float> undef, float %x, i32 0
%b = fadd <4 x float> %v, <float 42.0, float undef, float undef, float undef>
@@ -22,16 +20,14 @@ define <4 x float> @fadd_op1_constant_v4f32(float %x) nounwind {
define <4 x float> @load_fadd_op1_constant_v4f32(float* %p) nounwind {
; SSE-LABEL: load_fadd_op1_constant_v4f32:
; SSE: # %bb.0:
-; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-NEXT: addps %xmm1, %xmm0
+; SSE-NEXT: addss {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: load_fadd_op1_constant_v4f32:
; AVX: # %bb.0:
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
%x = load float, float* %p
%v = insertelement <4 x float> undef, float %x, i32 0
@@ -43,14 +39,14 @@ define <4 x float> @fsub_op0_constant_v4f32(float %x) nounwind {
; SSE-LABEL: fsub_op0_constant_v4f32:
; SSE: # %bb.0:
; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE-NEXT: subps %xmm0, %xmm1
+; SSE-NEXT: subss %xmm0, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: fsub_op0_constant_v4f32:
; AVX: # %bb.0:
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-NEXT: vsubps %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%v = insertelement <4 x float> undef, float %x, i32 0
%b = fsub <4 x float> <float 42.0, float undef, float undef, float undef>, %v
@@ -60,16 +56,14 @@ define <4 x float> @fsub_op0_constant_v4f32(float %x) nounwind {
define <4 x float> @load_fsub_op0_constant_v4f32(float* %p) nounwind {
; SSE-LABEL: load_fsub_op0_constant_v4f32:
; SSE: # %bb.0:
-; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-NEXT: subps %xmm1, %xmm0
+; SSE-NEXT: subss (%rdi), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: load_fsub_op0_constant_v4f32:
; AVX: # %bb.0:
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-NEXT: vsubps %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vsubss (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
%x = load float, float* %p
%v = insertelement <4 x float> undef, float %x, i32 0
@@ -80,14 +74,12 @@ define <4 x float> @load_fsub_op0_constant_v4f32(float* %p) nounwind {
define <4 x float> @fmul_op1_constant_v4f32(float %x) nounwind {
; SSE-LABEL: fmul_op1_constant_v4f32:
; SSE: # %bb.0:
-; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE-NEXT: mulps %xmm1, %xmm0
+; SSE-NEXT: mulss {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: fmul_op1_constant_v4f32:
; AVX: # %bb.0:
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
%v = insertelement <4 x float> undef, float %x, i32 0
%b = fmul <4 x float> %v, <float 42.0, float undef, float undef, float undef>
@@ -97,16 +89,14 @@ define <4 x float> @fmul_op1_constant_v4f32(float %x) nounwind {
define <4 x float> @load_fmul_op1_constant_v4f32(float* %p) nounwind {
; SSE-LABEL: load_fmul_op1_constant_v4f32:
; SSE: # %bb.0:
-; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-NEXT: mulps %xmm1, %xmm0
+; SSE-NEXT: mulss {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: load_fmul_op1_constant_v4f32:
; AVX: # %bb.0:
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
%x = load float, float* %p
%v = insertelement <4 x float> undef, float %x, i32 0
@@ -117,14 +107,12 @@ define <4 x float> @load_fmul_op1_constant_v4f32(float* %p) nounwind {
define <4 x float> @fdiv_op1_constant_v4f32(float %x) nounwind {
; SSE-LABEL: fdiv_op1_constant_v4f32:
; SSE: # %bb.0:
-; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE-NEXT: divps %xmm1, %xmm0
+; SSE-NEXT: divss {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: fdiv_op1_constant_v4f32:
; AVX: # %bb.0:
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-NEXT: vdivps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vdivss {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
%v = insertelement <4 x float> undef, float %x, i32 0
%b = fdiv <4 x float> %v, <float 42.0, float undef, float undef, float undef>
@@ -135,15 +123,13 @@ define <4 x float> @load_fdiv_op1_constant_v4f32(float* %p) nounwind {
; SSE-LABEL: load_fdiv_op1_constant_v4f32:
; SSE: # %bb.0:
; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE-NEXT: divps %xmm1, %xmm0
+; SSE-NEXT: divss {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: load_fdiv_op1_constant_v4f32:
; AVX: # %bb.0:
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-NEXT: vdivps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vdivss {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
%x = load float, float* %p
%v = insertelement <4 x float> undef, float %x, i32 0
@@ -155,14 +141,14 @@ define <4 x float> @fdiv_op0_constant_v4f32(float %x) nounwind {
; SSE-LABEL: fdiv_op0_constant_v4f32:
; SSE: # %bb.0:
; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE-NEXT: divps %xmm0, %xmm1
+; SSE-NEXT: divss %xmm0, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: fdiv_op0_constant_v4f32:
; AVX: # %bb.0:
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-NEXT: vdivps %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%v = insertelement <4 x float> undef, float %x, i32 0
%b = fdiv <4 x float> <float 42.0, float undef, float undef, float undef>, %v
@@ -172,16 +158,14 @@ define <4 x float> @fdiv_op0_constant_v4f32(float %x) nounwind {
define <4 x float> @load_fdiv_op0_constant_v4f32(float* %p) nounwind {
; SSE-LABEL: load_fdiv_op0_constant_v4f32:
; SSE: # %bb.0:
-; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-NEXT: divps %xmm1, %xmm0
+; SSE-NEXT: divss (%rdi), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: load_fdiv_op0_constant_v4f32:
; AVX: # %bb.0:
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-NEXT: vdivps %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vdivss (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
%x = load float, float* %p
%v = insertelement <4 x float> undef, float %x, i32 0
@@ -192,14 +176,12 @@ define <4 x float> @load_fdiv_op0_constant_v4f32(float* %p) nounwind {
define <4 x double> @fadd_op1_constant_v4f64(double %x) nounwind {
; SSE-LABEL: fadd_op1_constant_v4f64:
; SSE: # %bb.0:
-; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-NEXT: addpd %xmm1, %xmm0
+; SSE-NEXT: addsd {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: fadd_op1_constant_v4f64:
; AVX: # %bb.0:
-; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
%v = insertelement <4 x double> undef, double %x, i32 0
%b = fadd <4 x double> %v, <double 42.0, double undef, double undef, double undef>
@@ -209,16 +191,14 @@ define <4 x double> @fadd_op1_constant_v4f64(double %x) nounwind {
define <4 x double> @load_fadd_op1_constant_v4f64(double* %p) nounwind {
; SSE-LABEL: load_fadd_op1_constant_v4f64:
; SSE: # %bb.0:
-; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-NEXT: addpd %xmm1, %xmm0
+; SSE-NEXT: addsd {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: load_fadd_op1_constant_v4f64:
; AVX: # %bb.0:
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
%x = load double, double* %p
%v = insertelement <4 x double> undef, double %x, i32 0
@@ -230,14 +210,14 @@ define <4 x double> @fsub_op0_constant_v4f64(double %x) nounwind {
; SSE-LABEL: fsub_op0_constant_v4f64:
; SSE: # %bb.0:
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-NEXT: subpd %xmm0, %xmm1
+; SSE-NEXT: subsd %xmm0, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: fsub_op0_constant_v4f64:
; AVX: # %bb.0:
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vsubpd %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%v = insertelement <4 x double> undef, double %x, i32 0
%b = fsub <4 x double> <double 42.0, double undef, double undef, double undef>, %v
@@ -247,16 +227,14 @@ define <4 x double> @fsub_op0_constant_v4f64(double %x) nounwind {
define <4 x double> @load_fsub_op0_constant_v4f64(double* %p) nounwind {
; SSE-LABEL: load_fsub_op0_constant_v4f64:
; SSE: # %bb.0:
-; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-NEXT: subpd %xmm1, %xmm0
+; SSE-NEXT: subsd (%rdi), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: load_fsub_op0_constant_v4f64:
; AVX: # %bb.0:
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vsubpd %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vsubsd (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
%x = load double, double* %p
%v = insertelement <4 x double> undef, double %x, i32 0
@@ -267,14 +245,12 @@ define <4 x double> @load_fsub_op0_constant_v4f64(double* %p) nounwind {
define <4 x double> @fmul_op1_constant_v4f64(double %x) nounwind {
; SSE-LABEL: fmul_op1_constant_v4f64:
; SSE: # %bb.0:
-; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-NEXT: mulpd %xmm1, %xmm0
+; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: fmul_op1_constant_v4f64:
; AVX: # %bb.0:
-; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
%v = insertelement <4 x double> undef, double %x, i32 0
%b = fmul <4 x double> %v, <double 42.0, double undef, double undef, double undef>
@@ -284,16 +260,14 @@ define <4 x double> @fmul_op1_constant_v4f64(double %x) nounwind {
define <4 x double> @load_fmul_op1_constant_v4f64(double* %p) nounwind {
; SSE-LABEL: load_fmul_op1_constant_v4f64:
; SSE: # %bb.0:
-; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-NEXT: mulpd %xmm1, %xmm0
+; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: load_fmul_op1_constant_v4f64:
; AVX: # %bb.0:
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
%x = load double, double* %p
%v = insertelement <4 x double> undef, double %x, i32 0
@@ -304,14 +278,12 @@ define <4 x double> @load_fmul_op1_constant_v4f64(double* %p) nounwind {
define <4 x double> @fdiv_op1_constant_v4f64(double %x) nounwind {
; SSE-LABEL: fdiv_op1_constant_v4f64:
; SSE: # %bb.0:
-; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-NEXT: divpd %xmm1, %xmm0
+; SSE-NEXT: divsd {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: fdiv_op1_constant_v4f64:
; AVX: # %bb.0:
-; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vdivpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vdivsd {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
%v = insertelement <4 x double> undef, double %x, i32 0
%b = fdiv <4 x double> %v, <double 42.0, double undef, double undef, double undef>
@@ -322,15 +294,13 @@ define <4 x double> @load_fdiv_op1_constant_v4f64(double* %p) nounwind {
; SSE-LABEL: load_fdiv_op1_constant_v4f64:
; SSE: # %bb.0:
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-NEXT: divpd %xmm1, %xmm0
+; SSE-NEXT: divsd {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: load_fdiv_op1_constant_v4f64:
; AVX: # %bb.0:
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vdivpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vdivsd {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
%x = load double, double* %p
%v = insertelement <4 x double> undef, double %x, i32 0
@@ -342,14 +312,14 @@ define <4 x double> @fdiv_op0_constant_v4f64(double %x) nounwind {
; SSE-LABEL: fdiv_op0_constant_v4f64:
; SSE: # %bb.0:
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-NEXT: divpd %xmm0, %xmm1
+; SSE-NEXT: divsd %xmm0, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: fdiv_op0_constant_v4f64:
; AVX: # %bb.0:
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vdivpd %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%v = insertelement <4 x double> undef, double %x, i32 0
%b = fdiv <4 x double> <double 42.0, double undef, double undef, double undef>, %v
@@ -359,16 +329,14 @@ define <4 x double> @fdiv_op0_constant_v4f64(double %x) nounwind {
define <4 x double> @load_fdiv_op0_constant_v4f64(double* %p) nounwind {
; SSE-LABEL: load_fdiv_op0_constant_v4f64:
; SSE: # %bb.0:
-; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-NEXT: divpd %xmm1, %xmm0
+; SSE-NEXT: divsd (%rdi), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: load_fdiv_op0_constant_v4f64:
; AVX: # %bb.0:
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vdivpd %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vdivsd (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
%x = load double, double* %p
%v = insertelement <4 x double> undef, double %x, i32 0
OpenPOWER on IntegriCloud