summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2018-12-04 16:52:32 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2018-12-04 16:52:32 +0000
commit07843640d5824e007cfc40da70ed65007410e30e (patch)
treeb5cb1b55e174c415096c31d064bf404014b96e46
parent32a728298cbffd2196208c0003df985594894428 (diff)
downloadbcm5719-llvm-07843640d5824e007cfc40da70ed65007410e30e.tar.gz
bcm5719-llvm-07843640d5824e007cfc40da70ed65007410e30e.zip
[X86][SSE] Add SimplifyDemandedBitsForTargetNode handling for MOVMSK
Moves existing SimplifyDemandedBits call out of combineMOVMSK and add SimplifyDemandedVectorElts call based on the sign bits we need. llvm-svn: 348282
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp45
-rw-r--r--llvm/test/CodeGen/X86/movmsk.ll3
2 files changed, 40 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index bd069534901..436ee4c4205 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32466,6 +32466,39 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
}
break;
}
+ case X86ISD::MOVMSK: {
+ SDValue Src = Op.getOperand(0);
+ MVT VT = Op.getSimpleValueType();
+ MVT SrcVT = Src.getSimpleValueType();
+ unsigned SrcBits = SrcVT.getScalarSizeInBits();
+ unsigned NumElts = SrcVT.getVectorNumElements();
+
+ // If we don't need the sign bits at all just return zero.
+ if (OriginalDemandedBits.countTrailingZeros() >= NumElts)
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+
+ // Only demand the vector elements of the sign bits we need.
+ APInt KnownUndef, KnownZero;
+ APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
+ if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
+ TLO, Depth + 1))
+ return true;
+
+ Known.Zero = KnownZero.zextOrSelf(BitWidth);
+ Known.Zero.setHighBits(BitWidth - NumElts);
+
+ // MOVMSK only uses the MSB from each vector element.
+ KnownBits KnownSrc;
+ if (SimplifyDemandedBits(Src, APInt::getSignMask(SrcBits), KnownSrc, TLO,
+ Depth + 1))
+ return true;
+
+ if (KnownSrc.One[SrcBits - 1])
+ Known.One.setLowBits(NumElts);
+ else if (KnownSrc.Zero[SrcBits - 1])
+ Known.Zero.setLowBits(NumElts);
+ return false;
+ }
}
return TargetLowering::SimplifyDemandedBitsForTargetNode(
@@ -39566,10 +39599,11 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
SDValue Src = N->getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
+ MVT VT = N->getSimpleValueType(0);
// Perform constant folding.
if (ISD::isBuildVectorOfConstantSDNodes(Src.getNode())) {
- assert(N->getValueType(0) == MVT::i32 && "Unexpected result type");
+ assert(VT== MVT::i32 && "Unexpected result type");
APInt Imm(32, 0);
for (unsigned Idx = 0, e = Src.getNumOperands(); Idx < e; ++Idx) {
SDValue In = Src.getOperand(Idx);
@@ -39577,7 +39611,7 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
cast<ConstantSDNode>(In)->getAPIntValue().isNegative())
Imm.setBit(Idx);
}
- return DAG.getConstant(Imm, SDLoc(N), N->getValueType(0));
+ return DAG.getConstant(Imm, SDLoc(N), VT);
}
// Look through int->fp bitcasts that don't change the element width.
@@ -39587,11 +39621,10 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
EVT(SrcVT).changeVectorElementTypeToInteger())
Src = Src.getOperand(0);
+ // Simplify the inputs.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-
- // MOVMSK only uses the MSB from each vector element.
- APInt DemandedMask(APInt::getSignMask(SrcVT.getScalarSizeInBits()));
- if (TLI.SimplifyDemandedBits(Src, DemandedMask, DCI))
+ APInt DemandedMask(APInt::getAllOnesValue(VT.getScalarSizeInBits()));
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
return SDValue(N, 0);
// Combine (movmsk (setne (and X, (1 << C)), 0)) -> (movmsk (X << C)).
diff --git a/llvm/test/CodeGen/X86/movmsk.ll b/llvm/test/CodeGen/X86/movmsk.ll
index 623f92b4342..eebcf181196 100644
--- a/llvm/test/CodeGen/X86/movmsk.ll
+++ b/llvm/test/CodeGen/X86/movmsk.ll
@@ -134,11 +134,10 @@ define i32 @demandedbits_v16i8(<16 x i8> %x) {
ret i32 %2
}
-; TODO: Simplify demanded vector elts
+; Simplify demanded vector elts
define i32 @demandedelts_v4f32(<4 x float> %x) {
; CHECK-LABEL: demandedelts_v4f32:
; CHECK: ## %bb.0:
-; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; CHECK-NEXT: movmskps %xmm0, %eax
; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: retq
OpenPOWER on IntegriCloud