summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-11-18 05:53:21 +0000
committerCraig Topper <craig.topper@intel.com>2018-11-18 05:53:21 +0000
commitf56a57518d8965697109b24dd83124c2064cc6e7 (patch)
treeca6a406ab846782761b6f0c5ee6002031d69f356 /llvm/lib/Target/X86/X86ISelLowering.cpp
parentab7781493d9edd2ad92e896ec310eb95dbf69d41 (diff)
downloadbcm5719-llvm-f56a57518d8965697109b24dd83124c2064cc6e7.tar.gz
bcm5719-llvm-f56a57518d8965697109b24dd83124c2064cc6e7.zip
[X86] Don't use a pmaddwd for vXi32 multiply if the inputs are zero extends from i8 or smaller without SSE4.1. Prefer to shrink the mul instead.
The zero extend will require two stages of unpacks to implement. So its better to shrink the multiply using pmullw and then extend that result back to v4i32 using a single unpack. llvm-svn: 347149
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp10
1 files changed, 10 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 675a48fbed4..7df0fa1ffc8 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -34678,6 +34678,16 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
+
+ // If we are zero extending two steps without SSE4.1, its better to reduce
+ // the vmul width instead.
+ if (!Subtarget.hasSSE41() &&
+ (N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getOperand(0).getScalarValueSizeInBits() <= 8) &&
+ (N1.getOpcode() == ISD::ZERO_EXTEND &&
+ N1.getOperand(0).getScalarValueSizeInBits() <= 8))
+ return SDValue();
+
APInt Mask17 = APInt::getHighBitsSet(32, 17);
if (!DAG.MaskedValueIsZero(N1, Mask17) ||
!DAG.MaskedValueIsZero(N0, Mask17))
OpenPOWER on IntegriCloud