diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-08-27 17:20:41 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-08-27 17:20:41 +0000 |
| commit | 4be11c05850640668c28ab45cc922f9899a76717 (patch) | |
| tree | af0c592452c2cc3915e8b44ee7ace6badbb2a808 /llvm/lib/Target | |
| parent | fff90377fd34889bf443c28517d982fbaf41bb9d (diff) | |
| download | bcm5719-llvm-4be11c05850640668c28ab45cc922f9899a76717.tar.gz bcm5719-llvm-4be11c05850640668c28ab45cc922f9899a76717.zip | |
[X86] When lowering v32i8 MULHS/MULHU, shuffle after the PACKUS rather than before.
We're using a 256-bit PACKUS to do the truncation, but that instruction operates on 128-bit lanes. So previously we shuffled first to rearrange the lanes. But that requires 2 shuffles. Instead we can shuffle after the PACKUS using a single VPERMQ. This matches what our normal LowerTRUNCATE code does when it uses PACKUS.
Differential Revision: https://reviews.llvm.org/D51284
llvm-svn: 340757
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 17 |
1 files changed, 7 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index dd7207b70f0..823c2719737 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -23020,16 +23020,13 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget, Hi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi); Lo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Lo, 8, DAG); Hi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Hi, 8, DAG); - // The ymm variant of PACKUS treats the 128-bit lanes separately, so - // before using PACKUS we need to permute the inputs to the correct lo/hi - // xmm lane. - const int LoMask[] = {0, 1, 2, 3, 4, 5, 6, 7, - 16, 17, 18, 19, 20, 21, 22, 23}; - const int HiMask[] = {8, 9, 10, 11, 12, 13, 14, 15, - 24, 25, 26, 27, 28, 29, 30, 31}; - return DAG.getNode(X86ISD::PACKUS, dl, VT, - DAG.getVectorShuffle(ExVT, dl, Lo, Hi, LoMask), - DAG.getVectorShuffle(ExVT, dl, Lo, Hi, HiMask)); + + SDValue Res = DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi); + // The ymm variant of PACKUS treats the 128-bit lanes separately, so we + // need to permute the final result into place. + Res = DAG.getBitcast(MVT::v4i64, Res); + Res = DAG.getVectorShuffle(MVT::v4i64, dl, Res, Res, { 0, 2, 1, 3 }); + return DAG.getBitcast(VT, Res); } assert(VT == MVT::v16i8 && "Unexpected VT"); |

