diff options
author | Craig Topper <craig.topper@intel.com> | 2019-10-12 07:59:29 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2019-10-12 07:59:29 +0000 |
commit | 9bd542dcd5b8d681a535fa2b5c5d0cf7193be0e1 (patch) | |
tree | 4c7d844327000e8db2f682d4f421498933d450e6 /llvm/lib/Target/X86 | |
parent | 80a4feed7c4882e1c97e7ff3534422eb0a394621 (diff) | |
download | bcm5719-llvm-9bd542dcd5b8d681a535fa2b5c5d0cf7193be0e1.tar.gz bcm5719-llvm-9bd542dcd5b8d681a535fa2b5c5d0cf7193be0e1.zip |
[X86] Use pack instructions for packus/ssat truncate patterns when 256-bit is the largest legal vector and the result type is at least 256 bits.
Since the input type is larger than 256-bits we'll need to some
concatenating to reassemble the results. The pack instructions
ability to concatenate while packing make this a shorter/faster
sequence.
llvm-svn: 374643
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 81df19b827f..6838dbbd08d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -39869,9 +39869,12 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, // vXi16 truncate instructions are only available with AVX512BW. // For 256-bit or smaller vectors, we require VLX. // FIXME: We could widen truncates to 512 to remove the VLX restriction. + // If the result type is 256-bits or larger and we have disable 512-bit + // registers, we should go ahead and use the pack instructions if possible. bool PreferAVX512 = ((Subtarget.hasAVX512() && InSVT == MVT::i32) || (Subtarget.hasBWI() && InSVT == MVT::i16)) && - (Subtarget.hasVLX() || InVT.getSizeInBits() > 256); + (Subtarget.hasVLX() || InVT.getSizeInBits() > 256) && + !(!Subtarget.useAVX512Regs() && VT.getSizeInBits() >= 256); if (VT.isVector() && isPowerOf2_32(VT.getVectorNumElements()) && !PreferAVX512 && |