summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-02-05 22:50:29 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-02-05 22:50:29 +0000
commit380ce75687e9f3fc00d52984dd9d64c11a8ad6eb (patch)
tree3f2c81e191fb4ab89f6c70fc99890e5720ad0e25 /llvm/lib/Target/X86/X86ISelLowering.cpp
parent134ed9986a1baa03d568dd551e20d2134d3dfeae (diff)
downloadbcm5719-llvm-380ce75687e9f3fc00d52984dd9d64c11a8ad6eb.tar.gz
bcm5719-llvm-380ce75687e9f3fc00d52984dd9d64c11a8ad6eb.zip
[X86][SSE] Replace insert_vector_elt(vec, -1, idx) with shuffle
Similar to what we already do for zero elt insertion, we can quickly rematerialize 'allbits' vectors so to avoid a unnecessary gpr value and insertion into a vector llvm-svn: 294162
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp20
1 files changed, 12 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1aa7c422012..af06cd3a719 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -13844,17 +13844,21 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
auto *N2C = cast<ConstantSDNode>(N2);
unsigned IdxVal = N2C->getZExtValue();
- // If we are clearing out a element, we do this more efficiently with a
- // blend shuffle than a costly integer insertion.
- // TODO: would other rematerializable values (e.g. allbits) benefit as well?
+ bool IsZeroElt = X86::isZeroNode(N1);
+ bool IsAllOnesElt = VT.isInteger() && llvm::isAllOnesConstant(N1);
+
+ // If we are inserting a element, see if we can do this more efficiently with
+ // a blend shuffle with a rematerializable vector than a costly integer
+ // insertion.
// TODO: pre-SSE41 targets will tend to use bit masking - this could still
// be beneficial if we are inserting several zeros and can combine the masks.
- if (X86::isZeroNode(N1) && Subtarget.hasSSE41() && NumElts <= 8) {
- SmallVector<int, 8> ClearMask;
+ if ((IsZeroElt || IsAllOnesElt) && Subtarget.hasSSE41() && NumElts <= 8) {
+ SmallVector<int, 8> BlendMask;
for (unsigned i = 0; i != NumElts; ++i)
- ClearMask.push_back(i == IdxVal ? i + NumElts : i);
- SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, dl);
- return DAG.getVectorShuffle(VT, dl, N0, ZeroVector, ClearMask);
+ BlendMask.push_back(i == IdxVal ? i + NumElts : i);
+ SDValue CstVector = IsZeroElt ? getZeroVector(VT, Subtarget, DAG, dl)
+ : DAG.getConstant(-1, dl, VT);
+ return DAG.getVectorShuffle(VT, dl, N0, CstVector, BlendMask);
}
// If the vector is wider than 128 bits, extract the 128-bit subvector, insert
OpenPOWER on IntegriCloud