diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-07-31 18:05:56 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-07-31 18:05:56 +0000 |
| commit | 5d9b00d15b3d449e8e2eadd5fa7d5d986970418f (patch) | |
| tree | dbf4ac4afe1a01f70752387d5fd8c20738ab559d /llvm/lib | |
| parent | 7f97570e79fffc25e204617714be2355f4af4c47 (diff) | |
| download | bcm5719-llvm-5d9b00d15b3d449e8e2eadd5fa7d5d986970418f.tar.gz bcm5719-llvm-5d9b00d15b3d449e8e2eadd5fa7d5d986970418f.zip | |
[X86][SSE] Use ISD::MULHU for constant/non-zero ISD::SRL lowering (PR38151)
As was done for vector rotations, we can efficiently use ISD::MULHU for vXi8/vXi16 ISD::SRL lowering.
Shift-by-zero cases are still problematic (mainly on v32i8 due to extra AND/ANDN/OR or VPBLENDVB blend masks but v8i16/v16i16 aren't great either if PBLENDW fails) so I've limited this first patch to known non-zero cases if we can't easily use PBLENDW.
Differential Revision: https://reviews.llvm.org/D49562
llvm-svn: 338407
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f7dbf3c4238..602e3a648c7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -23511,6 +23511,24 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, if (SDValue Scale = convertShiftLeftToScale(Amt, dl, Subtarget, DAG)) return DAG.getNode(ISD::MUL, dl, VT, R, Scale); + // Constant ISD::SRL can be performed efficiently on vXi8/vXi16 vectors as we + // can replace with ISD::MULHU, creating scale factor from (NumEltBits - Amt). + // TODO: Improve support for the shift by zero special case. + if (Op.getOpcode() == ISD::SRL && ConstantAmt && + ((Subtarget.hasSSE41() && VT == MVT::v8i16) || + DAG.isKnownNeverZero(Amt)) && + (VT == MVT::v16i8 || VT == MVT::v8i16 || + ((VT == MVT::v32i8 || VT == MVT::v16i16) && Subtarget.hasInt256()))) { + SDValue EltBits = DAG.getConstant(VT.getScalarSizeInBits(), dl, VT); + SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt); + if (SDValue Scale = convertShiftLeftToScale(RAmt, dl, Subtarget, DAG)) { + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue ZAmt = DAG.getSetCC(dl, VT, Amt, Zero, ISD::SETEQ); + SDValue Res = DAG.getNode(ISD::MULHU, dl, VT, R, Scale); + return DAG.getSelect(dl, VT, ZAmt, R, Res); + } + } + // v4i32 Non Uniform Shifts. // If the shift amount is constant we can shift each lane using the SSE2 // immediate shifts, else we need to zero-extend each lane to the lower i64 |

