diff options
author | Craig Topper <craig.topper@intel.com> | 2018-11-18 05:53:21 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-11-18 05:53:21 +0000 |
commit | f56a57518d8965697109b24dd83124c2064cc6e7 (patch) | |
tree | ca6a406ab846782761b6f0c5ee6002031d69f356 /llvm/lib/Target/X86/X86ISelLowering.cpp | |
parent | ab7781493d9edd2ad92e896ec310eb95dbf69d41 (diff) | |
download | bcm5719-llvm-f56a57518d8965697109b24dd83124c2064cc6e7.tar.gz bcm5719-llvm-f56a57518d8965697109b24dd83124c2064cc6e7.zip |
[X86] Don't use a pmaddwd for vXi32 multiply if the inputs are zero extends from i8 or smaller without SSE4.1. Prefer to shrink the mul instead.
The zero extend will require two stages of unpacks to implement. So its better to shrink the multiply using pmullw and then extend that result back to v4i32 using a single unpack.
llvm-svn: 347149
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 675a48fbed4..7df0fa1ffc8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -34678,6 +34678,16 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG, SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); + + // If we are zero extending two steps without SSE4.1, its better to reduce + // the vmul width instead. + if (!Subtarget.hasSSE41() && + (N0.getOpcode() == ISD::ZERO_EXTEND && + N0.getOperand(0).getScalarValueSizeInBits() <= 8) && + (N1.getOpcode() == ISD::ZERO_EXTEND && + N1.getOperand(0).getScalarValueSizeInBits() <= 8)) + return SDValue(); + APInt Mask17 = APInt::getHighBitsSet(32, 17); if (!DAG.MaskedValueIsZero(N1, Mask17) || !DAG.MaskedValueIsZero(N0, Mask17)) |