summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-08-27 17:20:41 +0000
committerCraig Topper <craig.topper@intel.com>2018-08-27 17:20:41 +0000
commit4be11c05850640668c28ab45cc922f9899a76717 (patch)
treeaf0c592452c2cc3915e8b44ee7ace6badbb2a808 /llvm/lib/Target
parentfff90377fd34889bf443c28517d982fbaf41bb9d (diff)
downloadbcm5719-llvm-4be11c05850640668c28ab45cc922f9899a76717.tar.gz
bcm5719-llvm-4be11c05850640668c28ab45cc922f9899a76717.zip
[X86] When lowering v32i8 MULHS/MULHU, shuffle after the PACKUS rather than before.
We're using a 256-bit PACKUS to do the truncation, but that instruction operates on 128-bit lanes. So previously we shuffled first to rearrange the lanes. But that requires 2 shuffles. Instead we can shuffle after the PACKUS using a single VPERMQ. This matches what our normal LowerTRUNCATE code does when it uses PACKUS. Differential Revision: https://reviews.llvm.org/D51284 llvm-svn: 340757
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp17
1 files changed, 7 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index dd7207b70f0..823c2719737 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -23020,16 +23020,13 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
Hi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi);
Lo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Lo, 8, DAG);
Hi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Hi, 8, DAG);
- // The ymm variant of PACKUS treats the 128-bit lanes separately, so
- // before using PACKUS we need to permute the inputs to the correct lo/hi
- // xmm lane.
- const int LoMask[] = {0, 1, 2, 3, 4, 5, 6, 7,
- 16, 17, 18, 19, 20, 21, 22, 23};
- const int HiMask[] = {8, 9, 10, 11, 12, 13, 14, 15,
- 24, 25, 26, 27, 28, 29, 30, 31};
- return DAG.getNode(X86ISD::PACKUS, dl, VT,
- DAG.getVectorShuffle(ExVT, dl, Lo, Hi, LoMask),
- DAG.getVectorShuffle(ExVT, dl, Lo, Hi, HiMask));
+
+ SDValue Res = DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);
+ // The ymm variant of PACKUS treats the 128-bit lanes separately, so we
+ // need to permute the final result into place.
+ Res = DAG.getBitcast(MVT::v4i64, Res);
+ Res = DAG.getVectorShuffle(MVT::v4i64, dl, Res, Res, { 0, 2, 1, 3 });
+ return DAG.getBitcast(VT, Res);
}
assert(VT == MVT::v16i8 && "Unexpected VT");
OpenPOWER on IntegriCloud