summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-10-03 09:41:00 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-10-03 09:41:00 +0000
commit19d535e75bc13de7e0f8dd51124de6cc1114332d (patch)
tree3627648a1e1026621ebd144022148224d4998abb /llvm/lib/Target/X86/X86ISelLowering.cpp
parente485b143ead5988e0051f1babe4600700923d3b3 (diff)
downloadbcm5719-llvm-19d535e75bc13de7e0f8dd51124de6cc1114332d.tar.gz
bcm5719-llvm-19d535e75bc13de7e0f8dd51124de6cc1114332d.zip
[X86][SSE] Add support for PACKSS/PACKUS constant folding
Pulled out of D38472 llvm-svn: 314776
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp85
1 files changed, 85 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0981d39fe5c..3dd4d74ca40 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5350,6 +5350,13 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
return false;
};
+ // Handle UNDEFs.
+ if (Op.isUndef()) {
+ APInt UndefSrcElts = APInt::getAllOnesValue(NumElts);
+ SmallVector<APInt, 64> SrcEltBits(NumElts, APInt(EltSizeInBits, 0));
+ return CastBitData(UndefSrcElts, SrcEltBits);
+ }
+
// Extract constant bits from build vector.
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
@@ -31838,6 +31845,82 @@ static SDValue combineShift(SDNode* N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ unsigned Opcode = N->getOpcode();
+ assert((X86ISD::PACKSS == Opcode || X86ISD::PACKUS == Opcode) &&
+ "Unexpected shift opcode");
+
+ EVT VT = N->getValueType(0);
+ EVT SVT = VT.getScalarType();
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ unsigned DstBitsPerElt = VT.getScalarSizeInBits();
+ unsigned SrcBitsPerElt = 2 * DstBitsPerElt;
+ assert(N0.getScalarValueSizeInBits() == SrcBitsPerElt &&
+ N1.getScalarValueSizeInBits() == SrcBitsPerElt &&
+ "Unexpected PACKSS/PACKUS input type");
+
+ // Constant Folding.
+ APInt UndefElts0, UndefElts1;
+ SmallVector<APInt, 32> EltBits0, EltBits1;
+ if ((N0->isUndef() || N->isOnlyUserOf(N0.getNode())) &&
+ (N1->isUndef() || N->isOnlyUserOf(N1.getNode())) &&
+ getTargetConstantBitsFromNode(N0, SrcBitsPerElt, UndefElts0, EltBits0) &&
+ getTargetConstantBitsFromNode(N1, SrcBitsPerElt, UndefElts1, EltBits1)) {
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ unsigned NumDstElts = VT.getVectorNumElements();
+ unsigned NumSrcElts = NumDstElts / 2;
+ unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
+ unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
+ bool IsSigned = (X86ISD::PACKSS == Opcode);
+
+ APInt Undefs(NumDstElts, 0);
+ SmallVector<APInt, 32> Bits(NumDstElts, APInt::getNullValue(DstBitsPerElt));
+ for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+ for (unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) {
+ unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane;
+ auto &UndefElts = (Elt >= NumSrcEltsPerLane ? UndefElts1 : UndefElts0);
+ auto &EltBits = (Elt >= NumSrcEltsPerLane ? EltBits1 : EltBits0);
+
+ if (UndefElts[SrcIdx]) {
+ Undefs.setBit(Lane * NumDstEltsPerLane + Elt);
+ continue;
+ }
+
+ APInt &Val = EltBits[SrcIdx];
+ if (IsSigned) {
+ // PACKSS: Truncate signed value with signed saturation.
+ // Source values less than dst minint are saturated to minint.
+ // Source values greater than dst maxint are saturated to maxint.
+ if (Val.isSignedIntN(DstBitsPerElt))
+ Val = Val.trunc(DstBitsPerElt);
+ else if (Val.isNegative())
+ Val = APInt::getSignedMinValue(DstBitsPerElt);
+ else
+ Val = APInt::getSignedMaxValue(DstBitsPerElt);
+ } else {
+ // PACKUS: Truncate signed value with unsigned saturation.
+ // Source values less than zero are saturated to zero.
+ // Source values greater than dst maxuint are saturated to maxuint.
+ if (Val.isIntN(DstBitsPerElt))
+ Val = Val.trunc(DstBitsPerElt);
+ else if (Val.isNegative())
+ Val = APInt::getNullValue(DstBitsPerElt);
+ else
+ Val = APInt::getAllOnesValue(DstBitsPerElt);
+ }
+ Bits[Lane * NumDstEltsPerLane + Elt] = Val;
+ }
+ }
+
+ return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N));
+ }
+
+ return SDValue();
+}
+
static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -36069,6 +36152,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SETCC: return combineSetCC(N, DAG, Subtarget);
case X86ISD::SETCC: return combineX86SetCC(N, DAG, Subtarget);
case X86ISD::BRCOND: return combineBrCond(N, DAG, Subtarget);
+ case X86ISD::PACKSS:
+ case X86ISD::PACKUS: return combineVectorPack(N, DAG, DCI, Subtarget);
case X86ISD::VSHLI:
case X86ISD::VSRAI:
case X86ISD::VSRLI:
OpenPOWER on IntegriCloud