summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-03-25 19:50:14 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-03-25 19:50:14 +0000
commit5400a4d0af2d47db9cbb538401bc21bb41793b9e (patch)
tree272391344d44cb1cd75f9a6559780b7fd410a8a4 /llvm
parent1e6fedbb83585d6ed41c46d84b2727665d85f911 (diff)
downloadbcm5719-llvm-5400a4d0af2d47db9cbb538401bc21bb41793b9e.tar.gz
bcm5719-llvm-5400a4d0af2d47db9cbb538401bc21bb41793b9e.zip
[X86][SSE] Generalised CMP+AND1 combine to ZERO/ALLBITS+MASK
Patch to generalize combinePCMPAnd1 (for handling SETCC + ZEXT cases) to work for any input that has zero/all bits set masked with an 'all low bits' mask. Replaced the implicit assumption of shift availability with a call to SupportedVectorShiftWithImm. Part 1 of 3. Differential Revision: https://reviews.llvm.org/D31347 llvm-svn: 298779
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp48
1 files changed, 22 insertions, 26 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index dc6bc115a16..47f20a2b456 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -21690,15 +21690,15 @@ static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget,
if (VT.getScalarSizeInBits() < 16)
return false;
- if (VT.is512BitVector() &&
+ if (VT.is512BitVector() && Subtarget.hasAVX512() &&
(VT.getScalarSizeInBits() > 16 || Subtarget.hasBWI()))
return true;
- bool LShift = VT.is128BitVector() ||
- (VT.is256BitVector() && Subtarget.hasInt256());
+ bool LShift = (VT.is128BitVector() && Subtarget.hasSSE2()) ||
+ (VT.is256BitVector() && Subtarget.hasInt256());
bool AShift = LShift && (Subtarget.hasAVX512() ||
- (VT != MVT::v2i64 && VT != MVT::v4i64));
+ (VT != MVT::v2i64 && VT != MVT::v4i64));
return (Opcode == ISD::SRA) ? AShift : LShift;
}
@@ -31383,38 +31383,34 @@ static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// If this is a PCMPEQ or PCMPGT result that is bitwise-anded with 1 (this is
-/// the x86 lowering of a SETCC + ZEXT), replace the 'and' with a shift-right to
-/// eliminate loading the vector constant mask value. This relies on the fact
-/// that a PCMP always creates an all-ones or all-zeros bitmask per element.
-static SDValue combinePCMPAnd1(SDNode *N, SelectionDAG &DAG) {
+/// If this is a zero/all-bits result that is bitwise-anded with a low bits
+/// mask. (Mask == 1 for the x86 lowering of a SETCC + ZEXT), replace the 'and'
+/// with a shift-right to eliminate loading the vector constant mask value.
+static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
SDValue Op0 = peekThroughBitcasts(N->getOperand(0));
SDValue Op1 = peekThroughBitcasts(N->getOperand(1));
+ EVT VT0 = Op0.getValueType();
+ EVT VT1 = Op1.getValueType();
- // TODO: Use AssertSext to mark any nodes that have the property of producing
- // all-ones or all-zeros. Then check for that node rather than particular
- // opcodes.
- if (Op0.getOpcode() != X86ISD::PCMPEQ && Op0.getOpcode() != X86ISD::PCMPGT)
+ if (VT0 != VT1 || !VT0.isSimple() || !VT0.isInteger())
return SDValue();
- // The existence of the PCMP node guarantees that we have the required SSE2 or
- // AVX2 for a shift of this vector type, but there is no vector shift by
- // immediate for a vector with byte elements (PSRLB). 512-bit vectors use the
- // masked compare nodes, so they should not make it here.
- EVT VT0 = Op0.getValueType();
- EVT VT1 = Op1.getValueType();
- unsigned EltBitWidth = VT0.getScalarSizeInBits();
- if (VT0 != VT1 || EltBitWidth == 8)
+ APInt SplatVal;
+ if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal) ||
+ !APIntOps::isMask(SplatVal))
return SDValue();
- assert(VT0.getSizeInBits() == 128 || VT0.getSizeInBits() == 256);
+ if (!SupportedVectorShiftWithImm(VT0.getSimpleVT(), Subtarget, ISD::SRL))
+ return SDValue();
- APInt SplatVal;
- if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal) || SplatVal != 1)
+ unsigned EltBitWidth = VT0.getScalarSizeInBits();
+ if (EltBitWidth != DAG.ComputeNumSignBits(Op0))
return SDValue();
SDLoc DL(N);
- SDValue ShAmt = DAG.getConstant(EltBitWidth - 1, DL, MVT::i8);
+ unsigned ShiftVal = SplatVal.countTrailingOnes();
+ SDValue ShAmt = DAG.getConstant(EltBitWidth - ShiftVal, DL, MVT::i8);
SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT0, Op0, ShAmt);
return DAG.getBitcast(N->getValueType(0), Shift);
}
@@ -31434,7 +31430,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineANDXORWithAllOnesIntoANDNP(N, DAG))
return R;
- if (SDValue ShiftRight = combinePCMPAnd1(N, DAG))
+ if (SDValue ShiftRight = combineAndMaskToShift(N, DAG, Subtarget))
return ShiftRight;
EVT VT = N->getValueType(0);
OpenPOWER on IntegriCloud