summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2018-11-18 20:21:52 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2018-11-18 20:21:52 +0000
commitb31bdbd2e912f3b814ebd286131c5da64bb4bff9 (patch)
treed298c7ad7634d653dbf8c048329830d694f69c1e /llvm
parent11d50948e26261bc80a1c745422995d1f02aebb5 (diff)
downloadbcm5719-llvm-b31bdbd2e912f3b814ebd286131c5da64bb4bff9.tar.gz
bcm5719-llvm-b31bdbd2e912f3b814ebd286131c5da64bb4bff9.zip
[X86][SSE] Add SimplifyDemandedVectorElts support for SSE splat-vector-shifts.
SSE vector shifts only use the bottom 64-bits of the shift amount vector. llvm-svn: 347173
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp41
-rw-r--r--llvm/test/CodeGen/X86/vector-rotate-128.ll40
-rw-r--r--llvm/test/CodeGen/X86/vector-rotate-256.ll9
-rw-r--r--llvm/test/CodeGen/X86/vshift-1.ll1
-rw-r--r--llvm/test/CodeGen/X86/vshift-2.ll1
5 files changed, 59 insertions, 33 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 43be941e1a4..2bfa4461a39 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32152,6 +32152,21 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
// Handle special case opcodes.
switch (Opc) {
+ case X86ISD::VSHL:
+ case X86ISD::VSRL:
+ case X86ISD::VSRA: {
+ // We only need the bottom 64-bits of the (128-bit) shift amount.
+ SDValue Amt = Op.getOperand(1);
+ EVT AmtVT = Amt.getSimpleValueType();
+ assert(AmtVT.is128BitVector() && "Unexpected value type");
+ APInt AmtUndef, AmtZero;
+ int NumAmtElts = AmtVT.getVectorNumElements();
+ APInt AmtElts = APInt::getLowBitsSet(NumAmtElts, NumAmtElts / 2);
+ if (SimplifyDemandedVectorElts(Amt, AmtElts, AmtUndef, AmtZero, TLO,
+ Depth + 1))
+ return true;
+ break;
+ }
case X86ISD::VBROADCAST: {
SDValue Src = Op.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
@@ -35269,6 +35284,28 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue combineVectorShiftVar(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ assert((X86ISD::VSHL == N->getOpcode() || X86ISD::VSRA == N->getOpcode() ||
+ X86ISD::VSRL == N->getOpcode()) &&
+ "Unexpected shift opcode");
+ EVT VT = N->getValueType(0);
+
+ // Shift zero -> zero.
+ if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
+ return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));
+
+ APInt KnownUndef, KnownZero;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef,
+ KnownZero, DCI))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -40834,6 +40871,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::BRCOND: return combineBrCond(N, DAG, Subtarget);
case X86ISD::PACKSS:
case X86ISD::PACKUS: return combineVectorPack(N, DAG, DCI, Subtarget);
+ case X86ISD::VSHL:
+ case X86ISD::VSRA:
+ case X86ISD::VSRL:
+ return combineVectorShiftVar(N, DAG, DCI, Subtarget);
case X86ISD::VSHLI:
case X86ISD::VSRAI:
case X86ISD::VSRLI:
diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll
index 22adf6e8a53..256da3500c1 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-128.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll
@@ -664,34 +664,22 @@ define <16 x i8> @var_rotate_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
define <2 x i64> @splatvar_rotate_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE-LABEL: splatvar_rotate_v2i64:
; SSE: # %bb.0:
-; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
-; SSE-NEXT: movdqa {{.*#+}} xmm3 = [64,64]
-; SSE-NEXT: psubq %xmm2, %xmm3
-; SSE-NEXT: movdqa %xmm0, %xmm2
-; SSE-NEXT: psllq %xmm1, %xmm2
-; SSE-NEXT: psrlq %xmm3, %xmm0
-; SSE-NEXT: por %xmm2, %xmm0
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [64,64]
+; SSE-NEXT: psubq %xmm1, %xmm2
+; SSE-NEXT: movdqa %xmm0, %xmm3
+; SSE-NEXT: psllq %xmm1, %xmm3
+; SSE-NEXT: psrlq %xmm2, %xmm0
+; SSE-NEXT: por %xmm3, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: splatvar_rotate_v2i64:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [64,64]
-; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm1
-; AVX1-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splatvar_rotate_v2i64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq %xmm1, %xmm2
-; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [64,64]
-; AVX2-NEXT: vpsubq %xmm2, %xmm3, %xmm2
-; AVX2-NEXT: vpsllq %xmm1, %xmm0, %xmm1
-; AVX2-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: retq
+; AVX-LABEL: splatvar_rotate_v2i64:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [64,64]
+; AVX-NEXT: vpsubq %xmm1, %xmm2, %xmm2
+; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm1
+; AVX-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
;
; AVX512F-LABEL: splatvar_rotate_v2i64:
; AVX512F: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll
index cdfcb59d513..cbbbb8a9e3b 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-256.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll
@@ -521,12 +521,11 @@ define <4 x i64> @splatvar_rotate_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
;
; AVX2-LABEL: splatvar_rotate_v4i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq %xmm1, %ymm2
-; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm1
+; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm2
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [64,64]
-; AVX2-NEXT: vpsubq %xmm2, %xmm3, %xmm2
-; AVX2-NEXT: vpsrlq %xmm2, %ymm0, %ymm0
-; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpsubq %xmm1, %xmm3, %xmm1
+; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: splatvar_rotate_v4i64:
diff --git a/llvm/test/CodeGen/X86/vshift-1.ll b/llvm/test/CodeGen/X86/vshift-1.ll
index a2e1e7a641c..a5b2c35418c 100644
--- a/llvm/test/CodeGen/X86/vshift-1.ll
+++ b/llvm/test/CodeGen/X86/vshift-1.ll
@@ -29,7 +29,6 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
; X32-NEXT: psllq %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/vshift-2.ll b/llvm/test/CodeGen/X86/vshift-2.ll
index 6b01a8acdf4..8e57551d1e2 100644
--- a/llvm/test/CodeGen/X86/vshift-2.ll
+++ b/llvm/test/CodeGen/X86/vshift-2.ll
@@ -29,7 +29,6 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
; X32-NEXT: psrlq %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
OpenPOWER on IntegriCloud