summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp25
-rw-r--r--llvm/test/CodeGen/X86/avx512-hadd-hsub.ll12
-rw-r--r--llvm/test/CodeGen/X86/scalarize-fp.ll25
-rw-r--r--llvm/test/CodeGen/X86/vector-partial-undef.ll10
4 files changed, 44 insertions, 28 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3437f0debae..b440bbe29fd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18174,6 +18174,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue Ops[] = {LHS, RHS};
+ EVT VT = N->getValueType(0);
// See if we can constant fold the vector operation.
if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
@@ -18191,7 +18192,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
if (SVN0->getMask().equals(SVN1->getMask())) {
- EVT VT = N->getValueType(0);
SDValue UndefVector = LHS.getOperand(1);
SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
LHS.getOperand(0), RHS.getOperand(0),
@@ -18202,6 +18202,29 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
}
}
+ // The following pattern is likely to emerge with vector reduction ops. Moving
+ // the binary operation ahead of insertion may allow using a narrower vector
+ // instruction that has better performance than the wide version of the op:
+ // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
+ if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
+ RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
+ LHS.getOperand(2) == RHS.getOperand(2) &&
+ (LHS.hasOneUse() || RHS.hasOneUse())) {
+ SDValue X = LHS.getOperand(1);
+ SDValue Y = RHS.getOperand(1);
+ SDValue Z = LHS.getOperand(2);
+ EVT NarrowVT = X.getValueType();
+ if (NarrowVT == Y.getValueType() &&
+ TLI.isOperationLegalOrCustomOrPromote(N->getOpcode(), NarrowVT)) {
+ // (binop undef, undef) may not return undef, so compute that result.
+ SDLoc DL(N);
+ SDValue VecC = DAG.getNode(N->getOpcode(), DL, VT, DAG.getUNDEF(VT),
+ DAG.getUNDEF(VT));
+ SDValue NarrowBO = DAG.getNode(N->getOpcode(), DL, NarrowVT, X, Y);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
+ }
+ }
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/X86/avx512-hadd-hsub.ll b/llvm/test/CodeGen/X86/avx512-hadd-hsub.ll
index 00063521c6d..0979657e0f6 100644
--- a/llvm/test/CodeGen/X86/avx512-hadd-hsub.ll
+++ b/llvm/test/CodeGen/X86/avx512-hadd-hsub.ll
@@ -111,14 +111,14 @@ define <16 x i32> @hadd_16_3(<16 x i32> %x225, <16 x i32> %x227) {
; KNL: # %bb.0:
; KNL-NEXT: vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
; KNL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
-; KNL-NEXT: vpaddd %zmm0, %zmm2, %zmm0
+; KNL-NEXT: vpaddd %ymm0, %ymm2, %ymm0
; KNL-NEXT: retq
;
; SKX-LABEL: hadd_16_3:
; SKX: # %bb.0:
; SKX-NEXT: vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
; SKX-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
-; SKX-NEXT: vpaddd %zmm0, %zmm2, %zmm0
+; SKX-NEXT: vpaddd %ymm0, %ymm2, %ymm0
; SKX-NEXT: retq
%x226 = shufflevector <16 x i32> %x225, <16 x i32> %x227, <16 x i32> <i32 0, i32 2, i32 16, i32 18
, i32 4, i32 6, i32 20, i32 22, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -134,14 +134,14 @@ define <16 x float> @fhadd_16_3(<16 x float> %x225, <16 x float> %x227) {
; KNL: # %bb.0:
; KNL-NEXT: vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
; KNL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
-; KNL-NEXT: vaddps %zmm0, %zmm2, %zmm0
+; KNL-NEXT: vaddps %ymm0, %ymm2, %ymm0
; KNL-NEXT: retq
;
; SKX-LABEL: fhadd_16_3:
; SKX: # %bb.0:
; SKX-NEXT: vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
; SKX-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
-; SKX-NEXT: vaddps %zmm0, %zmm2, %zmm0
+; SKX-NEXT: vaddps %ymm0, %ymm2, %ymm0
; SKX-NEXT: retq
%x226 = shufflevector <16 x float> %x225, <16 x float> %x227, <16 x i32> <i32 0, i32 2, i32 16, i32 18
, i32 4, i32 6, i32 20, i32 22, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -156,14 +156,14 @@ define <8 x double> @fhadd_16_4(<8 x double> %x225, <8 x double> %x227) {
; KNL: # %bb.0:
; KNL-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; KNL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
-; KNL-NEXT: vaddpd %zmm0, %zmm2, %zmm0
+; KNL-NEXT: vaddpd %ymm0, %ymm2, %ymm0
; KNL-NEXT: retq
;
; SKX-LABEL: fhadd_16_4:
; SKX: # %bb.0:
; SKX-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
-; SKX-NEXT: vaddpd %zmm0, %zmm2, %zmm0
+; SKX-NEXT: vaddpd %ymm0, %ymm2, %ymm0
; SKX-NEXT: retq
%x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
%x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 undef ,i32 undef, i32 undef, i32 undef>
diff --git a/llvm/test/CodeGen/X86/scalarize-fp.ll b/llvm/test/CodeGen/X86/scalarize-fp.ll
index d9606b87352..5cf526dbff5 100644
--- a/llvm/test/CodeGen/X86/scalarize-fp.ll
+++ b/llvm/test/CodeGen/X86/scalarize-fp.ll
@@ -198,9 +198,8 @@ define <4 x double> @fadd_op1_constant_v4f64(double %x) nounwind {
;
; AVX-LABEL: fadd_op1_constant_v4f64:
; AVX: # %bb.0:
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%v = insertelement <4 x double> undef, double %x, i32 0
%b = fadd <4 x double> %v, <double 42.0, double undef, double undef, double undef>
@@ -219,7 +218,7 @@ define <4 x double> @load_fadd_op1_constant_v4f64(double* %p) nounwind {
; AVX: # %bb.0:
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%x = load double, double* %p
%v = insertelement <4 x double> undef, double %x, i32 0
@@ -237,9 +236,8 @@ define <4 x double> @fsub_op0_constant_v4f64(double %x) nounwind {
;
; AVX-LABEL: fsub_op0_constant_v4f64:
; AVX: # %bb.0:
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vsubpd %ymm0, %ymm1, %ymm0
+; AVX-NEXT: vsubpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%v = insertelement <4 x double> undef, double %x, i32 0
%b = fsub <4 x double> <double 42.0, double undef, double undef, double undef>, %v
@@ -258,7 +256,7 @@ define <4 x double> @load_fsub_op0_constant_v4f64(double* %p) nounwind {
; AVX: # %bb.0:
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vsubpd %ymm0, %ymm1, %ymm0
+; AVX-NEXT: vsubpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%x = load double, double* %p
%v = insertelement <4 x double> undef, double %x, i32 0
@@ -275,9 +273,8 @@ define <4 x double> @fmul_op1_constant_v4f64(double %x) nounwind {
;
; AVX-LABEL: fmul_op1_constant_v4f64:
; AVX: # %bb.0:
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%v = insertelement <4 x double> undef, double %x, i32 0
%b = fmul <4 x double> %v, <double 42.0, double undef, double undef, double undef>
@@ -296,7 +293,7 @@ define <4 x double> @load_fmul_op1_constant_v4f64(double* %p) nounwind {
; AVX: # %bb.0:
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%x = load double, double* %p
%v = insertelement <4 x double> undef, double %x, i32 0
@@ -313,9 +310,8 @@ define <4 x double> @fdiv_op1_constant_v4f64(double %x) nounwind {
;
; AVX-LABEL: fdiv_op1_constant_v4f64:
; AVX: # %bb.0:
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vdivpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vdivpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%v = insertelement <4 x double> undef, double %x, i32 0
%b = fdiv <4 x double> %v, <double 42.0, double undef, double undef, double undef>
@@ -334,7 +330,7 @@ define <4 x double> @load_fdiv_op1_constant_v4f64(double* %p) nounwind {
; AVX: # %bb.0:
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vdivpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vdivpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%x = load double, double* %p
%v = insertelement <4 x double> undef, double %x, i32 0
@@ -352,9 +348,8 @@ define <4 x double> @fdiv_op0_constant_v4f64(double %x) nounwind {
;
; AVX-LABEL: fdiv_op0_constant_v4f64:
; AVX: # %bb.0:
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vdivpd %ymm0, %ymm1, %ymm0
+; AVX-NEXT: vdivpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%v = insertelement <4 x double> undef, double %x, i32 0
%b = fdiv <4 x double> <double 42.0, double undef, double undef, double undef>, %v
@@ -373,7 +368,7 @@ define <4 x double> @load_fdiv_op0_constant_v4f64(double* %p) nounwind {
; AVX: # %bb.0:
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vdivpd %ymm0, %ymm1, %ymm0
+; AVX-NEXT: vdivpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%x = load double, double* %p
%v = insertelement <4 x double> undef, double %x, i32 0
diff --git a/llvm/test/CodeGen/X86/vector-partial-undef.ll b/llvm/test/CodeGen/X86/vector-partial-undef.ll
index 1cd3415d082..2b4ab11fea5 100644
--- a/llvm/test/CodeGen/X86/vector-partial-undef.ll
+++ b/llvm/test/CodeGen/X86/vector-partial-undef.ll
@@ -13,9 +13,7 @@ define <4 x i64> @xor_insert_insert(<2 x i64> %x, <2 x i64> %y) {
;
; AVX-LABEL: xor_insert_insert:
; AVX: # %bb.0:
-; AVX-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX-NEXT: vxorps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%xw = shufflevector <2 x i64> %x, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%yw = shufflevector <2 x i64> %y, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
@@ -32,9 +30,9 @@ define <4 x i64> @xor_insert_insert_high_half(<2 x i64> %x, <2 x i64> %y) {
;
; AVX-LABEL: xor_insert_insert_high_half:
; AVX: # %bb.0:
-; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
-; AVX-NEXT: vxorps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX-NEXT: retq
%xw = shufflevector <2 x i64> %x, <2 x i64> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
%yw = shufflevector <2 x i64> %y, <2 x i64> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
OpenPOWER on IntegriCloud