diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-02-21 15:09:00 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-02-21 15:09:00 +0000 |
| commit | 3546156122698a4f32ae7babb764a8d47062a5cd (patch) | |
| tree | 2a95c31904de58c35e80bc84fcf2afeba45e5562 /llvm/lib/Target | |
| parent | ae4761c18679f267fdb4ec0d47d5cff816134849 (diff) | |
| download | bcm5719-llvm-3546156122698a4f32ae7babb764a8d47062a5cd.tar.gz bcm5719-llvm-3546156122698a4f32ae7babb764a8d47062a5cd.zip | |
[X86][SSE] Prefer to combine shuffles to VZEXT over VZEXT_MOVL.
This matches what is already done during shuffle lowering and helps prevent the need for a zero-vector in cases where shuffles match both patterns.
llvm-svn: 295723
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 18 |
1 files changed, 9 insertions, 9 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 8a9a8fa06bf..3aecdc269b5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -26387,15 +26387,6 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, unsigned NumMaskElts = Mask.size(); unsigned MaskEltSize = MaskVT.getScalarSizeInBits(); - // Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS). - if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2())) && - isUndefOrEqual(Mask[0], 0) && - isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) { - Shuffle = X86ISD::VZEXT_MOVL; - SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT; - return true; - } - // Match against a VZEXT instruction. // TODO: Add 512-bit vector support (split AVX512F and AVX512BW). if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) || @@ -26421,6 +26412,15 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, } } + // Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS). + if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2())) && + isUndefOrEqual(Mask[0], 0) && + isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) { + Shuffle = X86ISD::VZEXT_MOVL; + SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT; + return true; + } + // Check if we have SSE3 which will let us use MOVDDUP etc. The // instructions are no slower than UNPCKLPD but has the option to // fold the input operand into even an unaligned memory load. |

