diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-07-21 14:54:17 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-07-21 14:54:17 +0000 |
| commit | 88e0940d3bde4bf5c2a2997895830f4826174fd2 (patch) | |
| tree | aeaf06e123b72728b11273056def0ee75f839df3 /llvm/lib | |
| parent | 5ad891f7193b2d7cc6578c2cbffe7d3a04e4617b (diff) | |
| download | bcm5719-llvm-88e0940d3bde4bf5c2a2997895830f4826174fd2.tar.gz bcm5719-llvm-88e0940d3bde4bf5c2a2997895830f4826174fd2.zip | |
[X86][SSE] Allow folding of store/zext with PEXTRW of 0'th element
Under normal circumstances we prefer the higher performance MOVD to extract the 0'th element of a v8i16 vector instead of PEXTRW.
But as detailed on PR27265, this prevents the SSE41 implementation of PEXTRW from folding the store of the 0'th element. Additionally it prevents us from making use of the fact that the (SSE2) reg-reg version of PEXTRW implicitly zero-extends the i16 element to the i32/i64 destination register.
This patch only preferentially lowers to MOVD if we will not be zero-extending the extracted i16, nor prevent a store from being folded (on SSSE41).
Fix for PR27265.
Differential Revision: https://reviews.llvm.org/D22509
llvm-svn: 276289
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 21 |
1 files changed, 15 insertions, 6 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index cf6e970154e..7e5f0ad09d7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3779,6 +3779,14 @@ static bool MayFoldIntoStore(SDValue Op) { return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin()); } +static bool MayFoldIntoZeroExtend(SDValue Op) { + if (Op.hasOneUse()) { + unsigned Opcode = Op.getNode()->use_begin()->getOpcode(); + return (ISD::ZERO_EXTEND == Opcode); + } + return false; +} + static bool isTargetShuffle(unsigned Opcode) { switch(Opcode) { default: return false; @@ -12501,12 +12509,13 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, MVT VT = Op.getSimpleValueType(); if (VT.getSizeInBits() == 16) { - // If IdxVal is 0, it's cheaper to do a move instead of a pextrw. - if (IdxVal == 0) - return DAG.getNode( - ISD::TRUNCATE, dl, MVT::i16, - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, - DAG.getBitcast(MVT::v4i32, Vec), Idx)); + // If IdxVal is 0, it's cheaper to do a move instead of a pextrw, unless + // we're going to zero extend the register or fold the store (SSE41 only). + if (IdxVal == 0 && !MayFoldIntoZeroExtend(Op) && + !(Subtarget.hasSSE41() && MayFoldIntoStore(Op))) + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, + DAG.getBitcast(MVT::v4i32, Vec), Idx)); // Transform it so it match pextrw which produces a 32-bit result. SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32, |

