summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2018-08-28 10:37:29 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2018-08-28 10:37:29 +0000
commitaf98587095de42183328a3a575bb759b88c0808d (patch)
treec8017150e8b53649091cb8a14a6035a8306a26f4 /llvm/lib/Target
parentbb6aa92c311cbd5b290604125e7dc145974456fd (diff)
downloadbcm5719-llvm-af98587095de42183328a3a575bb759b88c0808d.tar.gz
bcm5719-llvm-af98587095de42183328a3a575bb759b88c0808d.zip
[X86][SSE] Improve variable scalar shift of vXi8 vectors (PR34694)
This patch creates the shift mask and actual shift using the vXi16 vector shift ops. Differential Revision: https://reviews.llvm.org/D51263 llvm-svn: 340813
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp71
1 files changed, 58 insertions, 13 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 63dd407a0bb..97e64b57681 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -20445,23 +20445,24 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
// Need to build a vector containing shift amount.
// SSE/AVX packed shifts only use the lower 64-bit of the shift count.
- // +=================+============+=======================================+
- // | ShAmt is | HasSSE4.1? | Construct ShAmt vector as |
- // +=================+============+=======================================+
- // | i64 | Yes, No | Use ShAmt as lowest elt |
- // | i32 | Yes | zero-extend in-reg |
- // | (i32 zext(i16)) | Yes | zero-extend in-reg |
- // | (i32 zext(i16)) | No | byte-shift-in-reg |
- // | i16/i32 | No | v4i32 build_vector(ShAmt, 0, ud, ud)) |
- // +=================+============+=======================================+
+ // +====================+============+=======================================+
+ // | ShAmt is | HasSSE4.1? | Construct ShAmt vector as |
+ // +====================+============+=======================================+
+ // | i64 | Yes, No | Use ShAmt as lowest elt |
+ // | i32 | Yes | zero-extend in-reg |
+ // | (i32 zext(i16/i8)) | Yes | zero-extend in-reg |
+ // | (i32 zext(i16/i8)) | No | byte-shift-in-reg |
+ // | i16/i32 | No | v4i32 build_vector(ShAmt, 0, ud, ud)) |
+ // +====================+============+=======================================+
if (SVT == MVT::i64)
ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt);
else if (ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
ShAmt.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {
+ (ShAmt.getOperand(0).getSimpleValueType() == MVT::i16 ||
+ ShAmt.getOperand(0).getSimpleValueType() == MVT::i8)) {
ShAmt = ShAmt.getOperand(0);
- MVT AmtTy = MVT::v8i16;
+ MVT AmtTy = ShAmt.getSimpleValueType() == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), AmtTy, ShAmt);
if (Subtarget.hasSSE41())
ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);
@@ -23467,8 +23468,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
Amt = peekThroughEXTRACT_SUBVECTORs(Amt);
- if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) {
- if (SDValue BaseShAmt = IsSplatValue(VT, Amt, dl, DAG, Subtarget, Opcode)) {
+ if (SDValue BaseShAmt = IsSplatValue(VT, Amt, dl, DAG, Subtarget, Opcode)) {
+ if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) {
MVT EltVT = VT.getVectorElementType();
assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!");
if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32))
@@ -23478,6 +23479,50 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, Subtarget, DAG);
}
+
+ // vXi8 shifts - shift as v8i16 + mask result.
+ if (((VT == MVT::v16i8 && !Subtarget.canExtendTo512DQ()) ||
+ (VT == MVT::v32i8 && !Subtarget.canExtendTo512BW()) ||
+ VT == MVT::v64i8) &&
+ !Subtarget.hasXOP()) {
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT ExtVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
+ if (SupportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, Opcode)) {
+ unsigned LogicalOp = (Opcode == ISD::SHL ? ISD::SHL : ISD::SRL);
+ unsigned LogicalX86Op = getTargetVShiftUniformOpcode(LogicalOp, false);
+ BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
+
+ // Create the mask using vXi16 shifts. For shift-rights we need to move
+ // the upper byte down before splatting the vXi8 mask.
+ SDValue BitMask = DAG.getConstant(-1, dl, ExtVT);
+ BitMask = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, BitMask,
+ BaseShAmt, Subtarget, DAG);
+ if (Opcode != ISD::SHL)
+ BitMask = getTargetVShiftByConstNode(LogicalX86Op, dl, ExtVT, BitMask,
+ 8, DAG);
+ BitMask = DAG.getBitcast(VT, BitMask);
+ BitMask = DAG.getVectorShuffle(VT, dl, BitMask, BitMask,
+ SmallVector<int, 64>(NumElts, 0));
+
+ SDValue Res = getTargetVShiftNode(LogicalX86Op, dl, ExtVT,
+ DAG.getBitcast(ExtVT, R), BaseShAmt,
+ Subtarget, DAG);
+ Res = DAG.getBitcast(VT, Res);
+ Res = DAG.getNode(ISD::AND, dl, VT, Res, BitMask);
+
+ if (Opcode == ISD::SRA) {
+ // ashr(R, Amt) === sub(xor(lshr(R, Amt), SignMask), SignMask)
+ // SignMask = lshr(SignBit, Amt) - safe to do this with PSRLW.
+ SDValue SignMask = DAG.getConstant(0x8080, dl, ExtVT);
+ SignMask = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, SignMask,
+ BaseShAmt, Subtarget, DAG);
+ SignMask = DAG.getBitcast(VT, SignMask);
+ Res = DAG.getNode(ISD::XOR, dl, VT, Res, SignMask);
+ Res = DAG.getNode(ISD::SUB, dl, VT, Res, SignMask);
+ }
+ return Res;
+ }
+ }
}
// Check cases (mainly 32-bit) where i64 is expanded into high and low parts.
OpenPOWER on IntegriCloud