summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp6
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp17
-rw-r--r--llvm/test/CodeGen/X86/trunc-subvector.ll2
-rw-r--r--llvm/test/CodeGen/X86/vector-rotate-128.ll40
-rw-r--r--llvm/test/CodeGen/X86/vector-rotate-256.ll39
-rw-r--r--llvm/test/CodeGen/X86/vector-rotate-512.ll26
6 files changed, 65 insertions, 65 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8ae012b3878..9f3ad172f0e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9461,6 +9461,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
return Res;
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -9474,6 +9477,9 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
return Res;
+ if (SimplifyDemandedVectorElts(SDValue(N, 0)))
+ return SDValue(N, 0);
+
return SDValue();
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 3ad71d5677e..ff00e6c2590 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1736,6 +1736,20 @@ bool TargetLowering::SimplifyDemandedVectorElts(
}
break;
}
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG: {
+ APInt SrcUndef, SrcZero;
+ SDValue Src = Op.getOperand(0);
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
+ if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef,
+ SrcZero, TLO, Depth + 1))
+ return true;
+ KnownZero = SrcZero.zextOrTrunc(NumElts);
+ KnownUndef = SrcUndef.zextOrTrunc(NumElts);
+ break;
+ }
case ISD::ADD:
case ISD::SUB:
case ISD::FADD:
@@ -1755,6 +1769,9 @@ bool TargetLowering::SimplifyDemandedVectorElts(
break;
}
case ISD::TRUNCATE:
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
KnownZero, TLO, Depth + 1))
return true;
diff --git a/llvm/test/CodeGen/X86/trunc-subvector.ll b/llvm/test/CodeGen/X86/trunc-subvector.ll
index 77e67d6e554..0355241ae5f 100644
--- a/llvm/test/CodeGen/X86/trunc-subvector.ll
+++ b/llvm/test/CodeGen/X86/trunc-subvector.ll
@@ -108,6 +108,7 @@ define <2 x i32> @test5(<8 x i32> %v) {
; AVX2: # %bb.0:
; AVX2-NEXT: vpmovsxdq %xmm0, %ymm1
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7],ymm1[24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,2,3]
@@ -227,6 +228,7 @@ define <2 x i32> @test10(<8 x i32> %v) {
; AVX2: # %bb.0:
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7],ymm1[24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,2,3]
diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll
index 45a7e55e519..614913ef7eb 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-128.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll
@@ -761,7 +761,6 @@ define <4 x i32> @splatvar_rotate_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE41-LABEL: splatvar_rotate_v4i32:
; SSE41: # %bb.0:
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; SSE41-NEXT: movdqa %xmm0, %xmm3
; SSE41-NEXT: pslld %xmm2, %xmm3
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [32,32,32,32]
@@ -774,7 +773,6 @@ define <4 x i32> @splatvar_rotate_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; AVX1-LABEL: splatvar_rotate_v4i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; AVX1-NEXT: vpslld %xmm2, %xmm0, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32]
; AVX1-NEXT: vpsubd %xmm1, %xmm3, %xmm1
@@ -786,7 +784,6 @@ define <4 x i32> @splatvar_rotate_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; AVX2-LABEL: splatvar_rotate_v4i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
-; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
; AVX2-NEXT: vpslld %xmm2, %xmm0, %xmm2
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32]
; AVX2-NEXT: vpsubd %xmm1, %xmm3, %xmm1
@@ -876,8 +873,6 @@ define <8 x i16> @splatvar_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; SSE41-LABEL: splatvar_rotate_v8i16:
; SSE41: # %bb.0:
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; SSE41-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7]
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; SSE41-NEXT: movdqa %xmm0, %xmm3
; SSE41-NEXT: psllw %xmm2, %xmm3
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
@@ -887,35 +882,20 @@ define <8 x i16> @splatvar_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; SSE41-NEXT: por %xmm3, %xmm0
; SSE41-NEXT: retq
;
-; AVX1-LABEL: splatvar_rotate_v8i16:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; AVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX1-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splatvar_rotate_v8i16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX2-NEXT: vpsllw %xmm2, %xmm0, %xmm2
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1
-; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatvar_rotate_v8i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm2
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0
+; AVX-NEXT: retq
;
; AVX512-LABEL: splatvar_rotate_v8i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpbroadcastw %xmm1, %xmm1
; AVX512-NEXT: vpsllw %xmm2, %xmm0, %xmm2
; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
; AVX512-NEXT: vpsubw %xmm1, %xmm3, %xmm1
diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll
index cbbbb8a9e3b..b7bbf66e7b5 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-256.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll
@@ -602,14 +602,13 @@ define <8 x i32> @splatvar_rotate_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
;
; AVX2-LABEL: splatvar_rotate_v8i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd %xmm1, %ymm2
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm1
+; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
+; AVX2-NEXT: vpslld %xmm2, %ymm0, %ymm2
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32]
-; AVX2-NEXT: vpsubd %xmm2, %xmm3, %xmm2
-; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
-; AVX2-NEXT: vpsrld %xmm2, %ymm0, %ymm0
-; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpsubd %xmm1, %xmm3, %xmm1
+; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_rotate_v8i32:
@@ -687,26 +686,24 @@ define <16 x i16> @splatvar_rotate_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
;
; AVX2-LABEL: splatvar_rotate_v16i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastw %xmm1, %ymm2
-; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm1
+; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm2
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX2-NEXT: vpsubw %xmm2, %xmm3, %xmm2
-; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
-; AVX2-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
-; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: splatvar_rotate_v16i16:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastw %xmm1, %ymm2
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512-NEXT: vpsllw %xmm1, %ymm0, %ymm1
+; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512-NEXT: vpsllw %xmm2, %ymm0, %ymm2
; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512-NEXT: vpsubw %xmm2, %xmm3, %xmm2
-; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
-; AVX512-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
-; AVX512-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX512-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX512-NEXT: retq
;
; XOPAVX1-LABEL: splatvar_rotate_v16i16:
diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll
index 54adeb28a26..bb42afd1458 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-512.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll
@@ -343,26 +343,24 @@ define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
;
; AVX512BW-LABEL: splatvar_rotate_v32i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm2
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm1
+; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512BW-NEXT: vpsubw %xmm2, %xmm3, %xmm2
-; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
-; AVX512BW-NEXT: vpsrlw %xmm2, %zmm0, %zmm0
-; AVX512BW-NEXT: vporq %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatvar_rotate_v32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %zmm2
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm1
+; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
-; AVX512VLBW-NEXT: vpsubw %xmm2, %xmm3, %xmm2
-; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
-; AVX512VLBW-NEXT: vpsrlw %xmm2, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vporq %zmm0, %zmm1, %zmm0
+; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0
; AVX512VLBW-NEXT: retq
%splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
%splat16 = sub <32 x i16> <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>, %splat
OpenPOWER on IntegriCloud