diff options
author | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2014-02-12 23:42:28 +0000 |
---|---|---|
committer | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2014-02-12 23:42:28 +0000 |
commit | 386d566395d90b01ddddf3d0ab219097cca8c37c (patch) | |
tree | f53452ea49b41ace09a4e3a5ace515f7f9b49f87 /llvm/lib | |
parent | fdde7ccf376c0269929541573650df5075f201b1 (diff) | |
download | bcm5719-llvm-386d566395d90b01ddddf3d0ab219097cca8c37c.tar.gz bcm5719-llvm-386d566395d90b01ddddf3d0ab219097cca8c37c.zip |
[X86] Teach the backend how to lower vector shift left into multiply rather than scalarizing it.
Instead of expanding a packed shift into a sequence of scalar shifts,
the backend now tries (when possible) to convert the vector shift into a
vector multiply.
Before this change, a shift of a MVT::v8i16 vector by a
build_vector of constants was always scalarized into a long sequence of "vector
extracts + scalar shifts + vector insert".
With this change, if there is SSE2 support, we emit a single vector multiply.
This change also affects SSE4.1, AVX, AVX2 shifts:
- A shift of a MVT::v4i32 vector by a build_vector of non uniform constants
is now lowered when possible into a single SSE4.1 vector multiply.
- Packed v16i16 shift left by constant build_vector are now expanded when
possible into a single AVX2 vpmullw.
This change also improves the lowering of AVX512f vector shifts.
Added test CodeGen/X86/vec_shift6.ll with some code examples that are affected
by this change.
llvm-svn: 201271
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 85656d80914..4ce2ea36c00 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -13156,6 +13156,39 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, return Op; } + // If possible, lower this packed shift into a vector multiply instead of + // expanding it into a sequence of scalar shifts. + // Do this only if the vector shift count is a constant build_vector. + if (Op.getOpcode() == ISD::SHL && + (VT == MVT::v8i16 || VT == MVT::v4i32 || + (Subtarget->hasInt256() && VT == MVT::v16i16)) && + ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) { + SmallVector<SDValue, 8> Elts; + EVT SVT = VT.getScalarType(); + unsigned SVTBits = SVT.getSizeInBits(); + const APInt &One = APInt(SVTBits, 1); + unsigned NumElems = VT.getVectorNumElements(); + + for (unsigned i=0; i !=NumElems; ++i) { + SDValue Op = Amt->getOperand(i); + if (Op->getOpcode() == ISD::UNDEF) { + Elts.push_back(Op); + continue; + } + + ConstantSDNode *ND = cast<ConstantSDNode>(Op); + const APInt &C = APInt(SVTBits, ND->getAPIntValue().getZExtValue()); + uint64_t ShAmt = C.getZExtValue(); + if (ShAmt >= SVTBits) { + Elts.push_back(DAG.getUNDEF(SVT)); + continue; + } + Elts.push_back(DAG.getConstant(One.shl(ShAmt), SVT)); + } + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Elts[0], NumElems); + return DAG.getNode(ISD::MUL, dl, VT, R, BV); + } + // Lower SHL with variable shift amount. if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) { Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT)); |