diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 14 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 12 |
2 files changed, 18 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2fca8e742b3..edefa158376 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -17789,15 +17789,13 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { // vpmovqb/w/d, vpmovdb/w, vpmovwb if (Subtarget.hasAVX512()) { - // word to byte only under BWI - if (InVT == MVT::v16i16 && !Subtarget.hasBWI()) { // v16i16 -> v16i8 - // Make sure we're allowed to promote 512-bits. - if (Subtarget.canExtendTo512DQ()) - return DAG.getNode(ISD::TRUNCATE, DL, VT, - DAG.getNode(X86ISD::VSEXT, DL, MVT::v16i32, In)); - } else { + // word to byte only under BWI. Otherwise we have to promoted to v16i32 + // and then truncate that. But we should only do that if we haven't been + // asked to avoid 512-bit vectors. The actual promotion to v16i32 will be + // handled by isel patterns. + if (InVT != MVT::v16i16 || Subtarget.hasBWI() || + Subtarget.canExtendTo512DQ()) return Op; - } } unsigned NumPackedSignBits = std::min<unsigned>(VT.getScalarSizeInBits(), 16); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index f8ade37f8df..734c1de8017 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -9774,6 +9774,18 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec>; defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec>; +// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge +// ext+trunc aggresively making it impossible to legalize the DAG to this +// pattern directly. +let Predicates = [HasAVX512, NoBWI] in { +def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), + (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>; +def: Pat<(v16i8 (trunc (bc_v16i16 (loadv4i64 addr:$src)))), + (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>; +def: Pat<(store (v16i8 (trunc (v16i16 VR256X:$src))), addr:$dst), + (VPMOVDBZmr addr:$dst, (v16i32 (VPMOVZXWDZrr VR256X:$src)))>; +} + //===----------------------------------------------------------------------===// // GATHER - SCATTER Operations |