summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-01-14 02:05:51 +0000
committerCraig Topper <craig.topper@intel.com>2018-01-14 02:05:51 +0000
commite9fc0cd920f98e6726f5874634c2a5d4fa0e6f5c (patch)
tree8bd0dff17227e31e392a4de88f6d665b0ed28353 /llvm/lib/Target
parent7a3b10184bd39d234e4c0a9ed41be2b110d0048d (diff)
downloadbcm5719-llvm-e9fc0cd920f98e6726f5874634c2a5d4fa0e6f5c.tar.gz
bcm5719-llvm-e9fc0cd920f98e6726f5874634c2a5d4fa0e6f5c.zip
[X86] Improve legalization of vXi16/vXi8 selects.
Extend vXi1 conditions of vXi8/vXi16 selects even before type legalization gets a chance to split wide vectors. Previously we would only extend 128 and 256 bit vectors. But if we start with a 512 bit vector or wider that needs to be split we wouldn't extend until after the split had taken place. By extending early we improve the results of type legalization. Don't widen condition of 128/256 bit vXi16/vXi8 selects when we have BWI but not VLX. We can still use a mask register by widening the select to 512-bits instead. This is similar to what we do for compares already. llvm-svn: 322450
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp11
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td8
2 files changed, 14 insertions, 5 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f30ce5f206f..f8e0a939983 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31508,14 +31508,15 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// v16i8 (select v16i1, v16i8, v16i8) does not have a proper
// lowering on KNL. In this case we convert it to
// v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction.
- // The same situation for all 128 and 256-bit vectors of i8 and i16.
+ // The same situation all vectors of i8 and i16 without BWI.
+ // Make sure we extend these even before type legalization gets a chance to
+ // split wide vectors.
// Since SKX these selects have a proper lowering.
- if (Subtarget.hasAVX512() && CondVT.isVector() &&
+ if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && CondVT.isVector() &&
CondVT.getVectorElementType() == MVT::i1 &&
- (VT.is128BitVector() || VT.is256BitVector()) &&
+ VT.getVectorNumElements() > 4 &&
(VT.getVectorElementType() == MVT::i8 ||
- VT.getVectorElementType() == MVT::i16) &&
- !(Subtarget.hasBWI() && Subtarget.hasVLX())) {
+ VT.getVectorElementType() == MVT::i16)) {
Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
DCI.AddToWorklist(Cond.getNode());
return DAG.getNode(N->getOpcode(), DL, VT, Cond, LHS, RHS);
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index d5b73ceb246..d8c536380bc 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -3437,6 +3437,14 @@ let Predicates = [HasAVX512, NoVLX] in {
defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
}
+let Predicates = [HasBWI, NoVLX] in {
+ defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
+ defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
+
+ defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
+ defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
+}
+
let Predicates = [HasAVX512] in {
// 512-bit store.
def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
OpenPOWER on IntegriCloud