diff options
| author | Craig Topper <craig.topper@intel.com> | 2017-10-15 16:41:17 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2017-10-15 16:41:17 +0000 |
| commit | a5af4a64d07017cb7ebbcb615d330ef29235094a (patch) | |
| tree | 6e4756271d7a1201e6427a9224c3dbffdf47c997 /llvm/lib/Target | |
| parent | a1f9c9dd8be51ba6b08dd556f166a52b66cf1bc9 (diff) | |
| download | bcm5719-llvm-a5af4a64d07017cb7ebbcb615d330ef29235094a.tar.gz bcm5719-llvm-a5af4a64d07017cb7ebbcb615d330ef29235094a.zip | |
[AVX512] Don't mark EXTLOAD as legal with AVX512. Continue using custom lowering.
Summary:
This was impeding our ability to combine the extending shuffles with other shuffles as you can see from the test changes.
There's one special case that needed to be added to use VZEXT directly for v8i8->v8i64 since the custom lowering requires v64i8.
Reviewers: RKSimon, zvi, delena
Reviewed By: delena
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D38714
llvm-svn: 315860
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 34 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 40 |
2 files changed, 15 insertions, 59 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f155de8ef02..6baef042872 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1140,7 +1140,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, for (MVT VT : MVT::fp_vector_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal); - for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) { + for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) { setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal); setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal); setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal); @@ -1244,18 +1244,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom); - - // FIXME. This commands are available on SSE/AVX2, add relevant patterns. - setLoadExtAction(ISD::EXTLOAD, MVT::v8i32, MVT::v8i8, Legal); - setLoadExtAction(ISD::EXTLOAD, MVT::v8i32, MVT::v8i16, Legal); - setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Legal); - setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i16, Legal); - setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i8, Legal); - setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i16, Legal); - setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i32, Legal); - setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i8, Legal); - setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i16, Legal); - setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i32, Legal); } setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); @@ -1515,13 +1503,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationPromotedToType(ISD::XOR, VT, MVT::v8i64); } - for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) { + for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) { setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal); - if (Subtarget.hasVLX()) { - // FIXME. This commands are available on SSE/AVX2, add relevant patterns. - setLoadExtAction(ExtType, MVT::v16i16, MVT::v16i8, Legal); - setLoadExtAction(ExtType, MVT::v8i16, MVT::v8i8, Legal); - } } } @@ -18439,6 +18422,12 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget &Subtarget, if (Ext == ISD::SEXTLOAD && RegSz >= 256) loadRegZize = 128; + // If we don't have BWI we won't be able to create the shuffle needed for + // v8i8->v8i64. + if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 && + MemVT == MVT::v8i8) + loadRegZize = 128; + // Represent our vector as a sequence of elements which are the // largest scalar that we can load. EVT LoadUnitVecVT = EVT::getVectorVT( @@ -18505,6 +18494,13 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget &Subtarget, return Shuff; } + if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 && + MemVT == MVT::v8i8) { + SDValue Sext = getExtendInVec(X86ISD::VZEXT, dl, RegVT, SlicedVec, DAG); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF); + return Sext; + } + // Redistribute the loaded elements into the different locations. SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i != NumElems; ++i) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index cdc364970ab..27294d51f55 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -8006,46 +8006,6 @@ defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s">; defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s">; defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s">; -// EXTLOAD patterns, implemented using vpmovz -multiclass avx512_ext_lowering<string InstrStr, X86VectorVTInfo To, - X86VectorVTInfo From, PatFrag LdFrag> { - def : Pat<(To.VT (LdFrag addr:$src)), - (!cast<Instruction>("VPMOVZX"#InstrStr#"rm") addr:$src)>; - def : Pat<(To.VT (vselect To.KRCWM:$mask, (LdFrag addr:$src), To.RC:$src0)), - (!cast<Instruction>("VPMOVZX"#InstrStr#"rmk") To.RC:$src0, - To.KRC:$mask, addr:$src)>; - def : Pat<(To.VT (vselect To.KRCWM:$mask, (LdFrag addr:$src), - To.ImmAllZerosV)), - (!cast<Instruction>("VPMOVZX"#InstrStr#"rmkz") To.KRC:$mask, - addr:$src)>; -} - -let Predicates = [HasVLX, HasBWI] in { - defm : avx512_ext_lowering<"BWZ128", v8i16x_info, v16i8x_info, extloadvi8>; - defm : avx512_ext_lowering<"BWZ256", v16i16x_info, v16i8x_info, extloadvi8>; -} -let Predicates = [HasBWI] in { - defm : avx512_ext_lowering<"BWZ", v32i16_info, v32i8x_info, extloadvi8>; -} -let Predicates = [HasVLX, HasAVX512] in { - defm : avx512_ext_lowering<"BDZ128", v4i32x_info, v16i8x_info, extloadvi8>; - defm : avx512_ext_lowering<"BDZ256", v8i32x_info, v16i8x_info, extloadvi8>; - defm : avx512_ext_lowering<"BQZ128", v2i64x_info, v16i8x_info, extloadvi8>; - defm : avx512_ext_lowering<"BQZ256", v4i64x_info, v16i8x_info, extloadvi8>; - defm : avx512_ext_lowering<"WDZ128", v4i32x_info, v8i16x_info, extloadvi16>; - defm : avx512_ext_lowering<"WDZ256", v8i32x_info, v8i16x_info, extloadvi16>; - defm : avx512_ext_lowering<"WQZ128", v2i64x_info, v8i16x_info, extloadvi16>; - defm : avx512_ext_lowering<"WQZ256", v4i64x_info, v8i16x_info, extloadvi16>; - defm : avx512_ext_lowering<"DQZ128", v2i64x_info, v4i32x_info, extloadvi32>; - defm : avx512_ext_lowering<"DQZ256", v4i64x_info, v4i32x_info, extloadvi32>; -} -let Predicates = [HasAVX512] in { - defm : avx512_ext_lowering<"BDZ", v16i32_info, v16i8x_info, extloadvi8>; - defm : avx512_ext_lowering<"BQZ", v8i64_info, v16i8x_info, extloadvi8>; - defm : avx512_ext_lowering<"WDZ", v16i32_info, v16i16x_info, extloadvi16>; - defm : avx512_ext_lowering<"WQZ", v8i64_info, v8i16x_info, extloadvi16>; - defm : avx512_ext_lowering<"DQZ", v8i64_info, v8i32x_info, extloadvi32>; -} multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, SDNode InVecOp, PatFrag ExtLoad16> { |

