summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2017-10-15 16:41:17 +0000
committerCraig Topper <craig.topper@intel.com>2017-10-15 16:41:17 +0000
commita5af4a64d07017cb7ebbcb615d330ef29235094a (patch)
tree6e4756271d7a1201e6427a9224c3dbffdf47c997 /llvm/lib/Target/X86
parenta1f9c9dd8be51ba6b08dd556f166a52b66cf1bc9 (diff)
downloadbcm5719-llvm-a5af4a64d07017cb7ebbcb615d330ef29235094a.tar.gz
bcm5719-llvm-a5af4a64d07017cb7ebbcb615d330ef29235094a.zip
[AVX512] Don't mark EXTLOAD as legal with AVX512. Continue using custom lowering.
Summary: This was impeding our ability to combine the extending shuffles with other shuffles as you can see from the test changes. There's one special case that needed to be added to use VZEXT directly for v8i8->v8i64 since the custom lowering requires v64i8. Reviewers: RKSimon, zvi, delena Reviewed By: delena Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D38714 llvm-svn: 315860
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp34
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td40
2 files changed, 15 insertions, 59 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f155de8ef02..6baef042872 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1140,7 +1140,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (MVT VT : MVT::fp_vector_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
- for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) {
+ for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
@@ -1244,18 +1244,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom);
-
- // FIXME. This commands are available on SSE/AVX2, add relevant patterns.
- setLoadExtAction(ISD::EXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
- setLoadExtAction(ISD::EXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
- setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
- setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
- setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
- setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
- setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
- setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
- setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
- setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
}
setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
@@ -1515,13 +1503,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationPromotedToType(ISD::XOR, VT, MVT::v8i64);
}
- for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) {
+ for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
- if (Subtarget.hasVLX()) {
- // FIXME. This commands are available on SSE/AVX2, add relevant patterns.
- setLoadExtAction(ExtType, MVT::v16i16, MVT::v16i8, Legal);
- setLoadExtAction(ExtType, MVT::v8i16, MVT::v8i8, Legal);
- }
}
}
@@ -18439,6 +18422,12 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget &Subtarget,
if (Ext == ISD::SEXTLOAD && RegSz >= 256)
loadRegZize = 128;
+ // If we don't have BWI we won't be able to create the shuffle needed for
+ // v8i8->v8i64.
+ if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 &&
+ MemVT == MVT::v8i8)
+ loadRegZize = 128;
+
// Represent our vector as a sequence of elements which are the
// largest scalar that we can load.
EVT LoadUnitVecVT = EVT::getVectorVT(
@@ -18505,6 +18494,13 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget &Subtarget,
return Shuff;
}
+ if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 &&
+ MemVT == MVT::v8i8) {
+ SDValue Sext = getExtendInVec(X86ISD::VZEXT, dl, RegVT, SlicedVec, DAG);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
+ return Sext;
+ }
+
// Redistribute the loaded elements into the different locations.
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index cdc364970ab..27294d51f55 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -8006,46 +8006,6 @@ defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s">;
defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s">;
defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s">;
-// EXTLOAD patterns, implemented using vpmovz
-multiclass avx512_ext_lowering<string InstrStr, X86VectorVTInfo To,
- X86VectorVTInfo From, PatFrag LdFrag> {
- def : Pat<(To.VT (LdFrag addr:$src)),
- (!cast<Instruction>("VPMOVZX"#InstrStr#"rm") addr:$src)>;
- def : Pat<(To.VT (vselect To.KRCWM:$mask, (LdFrag addr:$src), To.RC:$src0)),
- (!cast<Instruction>("VPMOVZX"#InstrStr#"rmk") To.RC:$src0,
- To.KRC:$mask, addr:$src)>;
- def : Pat<(To.VT (vselect To.KRCWM:$mask, (LdFrag addr:$src),
- To.ImmAllZerosV)),
- (!cast<Instruction>("VPMOVZX"#InstrStr#"rmkz") To.KRC:$mask,
- addr:$src)>;
-}
-
-let Predicates = [HasVLX, HasBWI] in {
- defm : avx512_ext_lowering<"BWZ128", v8i16x_info, v16i8x_info, extloadvi8>;
- defm : avx512_ext_lowering<"BWZ256", v16i16x_info, v16i8x_info, extloadvi8>;
-}
-let Predicates = [HasBWI] in {
- defm : avx512_ext_lowering<"BWZ", v32i16_info, v32i8x_info, extloadvi8>;
-}
-let Predicates = [HasVLX, HasAVX512] in {
- defm : avx512_ext_lowering<"BDZ128", v4i32x_info, v16i8x_info, extloadvi8>;
- defm : avx512_ext_lowering<"BDZ256", v8i32x_info, v16i8x_info, extloadvi8>;
- defm : avx512_ext_lowering<"BQZ128", v2i64x_info, v16i8x_info, extloadvi8>;
- defm : avx512_ext_lowering<"BQZ256", v4i64x_info, v16i8x_info, extloadvi8>;
- defm : avx512_ext_lowering<"WDZ128", v4i32x_info, v8i16x_info, extloadvi16>;
- defm : avx512_ext_lowering<"WDZ256", v8i32x_info, v8i16x_info, extloadvi16>;
- defm : avx512_ext_lowering<"WQZ128", v2i64x_info, v8i16x_info, extloadvi16>;
- defm : avx512_ext_lowering<"WQZ256", v4i64x_info, v8i16x_info, extloadvi16>;
- defm : avx512_ext_lowering<"DQZ128", v2i64x_info, v4i32x_info, extloadvi32>;
- defm : avx512_ext_lowering<"DQZ256", v4i64x_info, v4i32x_info, extloadvi32>;
-}
-let Predicates = [HasAVX512] in {
- defm : avx512_ext_lowering<"BDZ", v16i32_info, v16i8x_info, extloadvi8>;
- defm : avx512_ext_lowering<"BQZ", v8i64_info, v16i8x_info, extloadvi8>;
- defm : avx512_ext_lowering<"WDZ", v16i32_info, v16i16x_info, extloadvi16>;
- defm : avx512_ext_lowering<"WQZ", v8i64_info, v8i16x_info, extloadvi16>;
- defm : avx512_ext_lowering<"DQZ", v8i64_info, v8i32x_info, extloadvi32>;
-}
multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
SDNode InVecOp, PatFrag ExtLoad16> {
OpenPOWER on IntegriCloud