summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2015-04-06 18:39:00 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2015-04-06 18:39:00 +0000
commit49ba9b8e2ffcbe78de5971074a76a60191d59d86 (patch)
tree3992764d1f169dcd21c9df44e36188936bafb049 /llvm/lib/Target
parent7431fb0257a7c7921f3928a8699adb97b4e22936 (diff)
downloadbcm5719-llvm-49ba9b8e2ffcbe78de5971074a76a60191d59d86.tar.gz
bcm5719-llvm-49ba9b8e2ffcbe78de5971074a76a60191d59d86.zip
[X86][SSE] Use (V)PINSRB for direct byte insertion in 16i8 buildvector on SSE4.1 targets
This patch allows SSE4.1 targets to use (V)PINSRB to create 16i8 vectors by inserting i8 scalars directly into a XMM register instead of merging pairs of i8 scalars into a i16 and using the SSE2 PINSRW instruction. This allows folding of byte loads and reduces scalar register usage as well. Differential Revision: http://reviews.llvm.org/D8839 llvm-svn: 234193
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp23
1 files changed, 23 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3055256605a..2101724588b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4460,6 +4460,29 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
SDLoc dl(Op);
SDValue V;
bool First = true;
+
+ // SSE4.1 - use PINSRB to insert each byte directly.
+ if (Subtarget->hasSSE41()) {
+ for (unsigned i = 0; i < 16; ++i) {
+ bool isNonZero = (NonZeros & (1 << i)) != 0;
+ if (isNonZero) {
+ if (First) {
+ if (NumZero)
+ V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl);
+ else
+ V = DAG.getUNDEF(MVT::v16i8);
+ First = false;
+ }
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+ MVT::v16i8, V, Op.getOperand(i),
+ DAG.getIntPtrConstant(i));
+ }
+ }
+
+ return V;
+ }
+
+ // Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
for (unsigned i = 0; i < 16; ++i) {
bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
if (ThisIsNonZero && First) {
OpenPOWER on IntegriCloud