diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-08-28 10:37:29 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-08-28 10:37:29 +0000 |
| commit | af98587095de42183328a3a575bb759b88c0808d (patch) | |
| tree | c8017150e8b53649091cb8a14a6035a8306a26f4 /llvm/lib/Target | |
| parent | bb6aa92c311cbd5b290604125e7dc145974456fd (diff) | |
| download | bcm5719-llvm-af98587095de42183328a3a575bb759b88c0808d.tar.gz bcm5719-llvm-af98587095de42183328a3a575bb759b88c0808d.zip | |
[X86][SSE] Improve variable scalar shift of vXi8 vectors (PR34694)
This patch creates the shift mask and actual shift using the vXi16 vector shift ops.
Differential Revision: https://reviews.llvm.org/D51263
llvm-svn: 340813
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 71 |
1 files changed, 58 insertions, 13 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 63dd407a0bb..97e64b57681 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20445,23 +20445,24 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT, // Need to build a vector containing shift amount. // SSE/AVX packed shifts only use the lower 64-bit of the shift count. - // +=================+============+=======================================+ - // | ShAmt is | HasSSE4.1? | Construct ShAmt vector as | - // +=================+============+=======================================+ - // | i64 | Yes, No | Use ShAmt as lowest elt | - // | i32 | Yes | zero-extend in-reg | - // | (i32 zext(i16)) | Yes | zero-extend in-reg | - // | (i32 zext(i16)) | No | byte-shift-in-reg | - // | i16/i32 | No | v4i32 build_vector(ShAmt, 0, ud, ud)) | - // +=================+============+=======================================+ + // +====================+============+=======================================+ + // | ShAmt is | HasSSE4.1? | Construct ShAmt vector as | + // +====================+============+=======================================+ + // | i64 | Yes, No | Use ShAmt as lowest elt | + // | i32 | Yes | zero-extend in-reg | + // | (i32 zext(i16/i8)) | Yes | zero-extend in-reg | + // | (i32 zext(i16/i8)) | No | byte-shift-in-reg | + // | i16/i32 | No | v4i32 build_vector(ShAmt, 0, ud, ud)) | + // +====================+============+=======================================+ if (SVT == MVT::i64) ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt); else if (ShAmt.getOpcode() == ISD::ZERO_EXTEND && ShAmt.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && - ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) { + (ShAmt.getOperand(0).getSimpleValueType() == MVT::i16 || + ShAmt.getOperand(0).getSimpleValueType() == MVT::i8)) { ShAmt = ShAmt.getOperand(0); - MVT AmtTy = MVT::v8i16; + MVT AmtTy = ShAmt.getSimpleValueType() == MVT::i8 ? MVT::v16i8 : MVT::v8i16; ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), AmtTy, ShAmt); if (Subtarget.hasSSE41()) ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64); @@ -23467,8 +23468,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, Amt = peekThroughEXTRACT_SUBVECTORs(Amt); - if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) { - if (SDValue BaseShAmt = IsSplatValue(VT, Amt, dl, DAG, Subtarget, Opcode)) { + if (SDValue BaseShAmt = IsSplatValue(VT, Amt, dl, DAG, Subtarget, Opcode)) { + if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) { MVT EltVT = VT.getVectorElementType(); assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!"); if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32)) @@ -23478,6 +23479,50 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, Subtarget, DAG); } + + // vXi8 shifts - shift as v8i16 + mask result. + if (((VT == MVT::v16i8 && !Subtarget.canExtendTo512DQ()) || + (VT == MVT::v32i8 && !Subtarget.canExtendTo512BW()) || + VT == MVT::v64i8) && + !Subtarget.hasXOP()) { + unsigned NumElts = VT.getVectorNumElements(); + MVT ExtVT = MVT::getVectorVT(MVT::i16, NumElts / 2); + if (SupportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, Opcode)) { + unsigned LogicalOp = (Opcode == ISD::SHL ? ISD::SHL : ISD::SRL); + unsigned LogicalX86Op = getTargetVShiftUniformOpcode(LogicalOp, false); + BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt); + + // Create the mask using vXi16 shifts. For shift-rights we need to move + // the upper byte down before splatting the vXi8 mask. + SDValue BitMask = DAG.getConstant(-1, dl, ExtVT); + BitMask = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, BitMask, + BaseShAmt, Subtarget, DAG); + if (Opcode != ISD::SHL) + BitMask = getTargetVShiftByConstNode(LogicalX86Op, dl, ExtVT, BitMask, + 8, DAG); + BitMask = DAG.getBitcast(VT, BitMask); + BitMask = DAG.getVectorShuffle(VT, dl, BitMask, BitMask, + SmallVector<int, 64>(NumElts, 0)); + + SDValue Res = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, + DAG.getBitcast(ExtVT, R), BaseShAmt, + Subtarget, DAG); + Res = DAG.getBitcast(VT, Res); + Res = DAG.getNode(ISD::AND, dl, VT, Res, BitMask); + + if (Opcode == ISD::SRA) { + // ashr(R, Amt) === sub(xor(lshr(R, Amt), SignMask), SignMask) + // SignMask = lshr(SignBit, Amt) - safe to do this with PSRLW. + SDValue SignMask = DAG.getConstant(0x8080, dl, ExtVT); + SignMask = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, SignMask, + BaseShAmt, Subtarget, DAG); + SignMask = DAG.getBitcast(VT, SignMask); + Res = DAG.getNode(ISD::XOR, dl, VT, Res, SignMask); + Res = DAG.getNode(ISD::SUB, dl, VT, Res, SignMask); + } + return Res; + } + } } // Check cases (mainly 32-bit) where i64 is expanded into high and low parts. |

