summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2015-07-28 08:54:41 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2015-07-28 08:54:41 +0000
commitdf984f58adb03397fec171cc6c6ee8e4158dec0f (patch)
tree5eb7d9f564f11c0af2ea6a64ffb59b9ce7729616 /llvm/lib/Target
parent74da38706e17d0ae2d31dae5e25958e5464e041f (diff)
downloadbcm5719-llvm-df984f58adb03397fec171cc6c6ee8e4158dec0f.tar.gz
bcm5719-llvm-df984f58adb03397fec171cc6c6ee8e4158dec0f.zip
[X86][SSE] Use bitmasks instead of shuffles where possible.
VPAND is a lot faster than VPSHUFB and VPBLENDVB - this patch ensures we attempt to lower to a basic bitmask before lowering to the slower byte shuffle/blend instructions. Split off from D11518. Differential Revision: http://reviews.llvm.org/D11541 llvm-svn: 243395
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp8
1 files changed, 8 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 10904775179..b3672c3216d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -6663,6 +6663,10 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
assert((VT.getSizeInBits() == 128 || Subtarget->hasAVX2()) &&
"256-bit byte-blends require AVX2 support!");
+ // Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
+ if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, DAG))
+ return Masked;
+
// Scale the blend by the number of bytes per element.
int Scale = VT.getScalarSizeInBits() / 8;
@@ -9071,6 +9075,10 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return V;
}
+ if (SDValue Masked =
+ lowerVectorShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask, DAG))
+ return Masked;
+
// Use dedicated unpack instructions for masks that match their pattern.
if (isShuffleEquivalent(V1, V2, Mask, {// Low half.
0, 16, 1, 17, 2, 18, 3, 19,
OpenPOWER on IntegriCloud