summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-03-13 18:35:59 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-03-13 18:35:59 +0000
commit035b19ecf5874649f924e932d05ba450a9a0a540 (patch)
tree196f3ed5d1f83256a47ab945507d79f1667af979 /llvm/lib/Target
parentd27e61c59168626257a648dbc69021a7d8545bbd (diff)
downloadbcm5719-llvm-035b19ecf5874649f924e932d05ba450a9a0a540.tar.gz
bcm5719-llvm-035b19ecf5874649f924e932d05ba450a9a0a540.zip
[X86][SSE41] Avoid variable blend for constant v8i16 shifts
The SSE41 v8i16 shift lowering using (v)pblendvb is great for non-constant shift amounts, but if it is constant then we can efficiently reduce the VSELECT to shuffles with the pre-SSE41 lowering. llvm-svn: 263383
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp9
1 files changed, 7 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index bbbbf3e6537..43784a1c454 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19785,10 +19785,15 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
if (VT == MVT::v8i16) {
unsigned ShiftOpcode = Op->getOpcode();
+ // If we have a constant shift amount, the non-SSE41 path is best as
+ // avoiding bitcasts make it easier to constant fold and reduce to PBLENDW.
+ bool UseSSE41 = Subtarget.hasSSE41() &&
+ !ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());
+
auto SignBitSelect = [&](SDValue Sel, SDValue V0, SDValue V1) {
// On SSE41 targets we make use of the fact that VSELECT lowers
// to PBLENDVB which selects bytes based just on the sign bit.
- if (Subtarget.hasSSE41()) {
+ if (UseSSE41) {
MVT ExtVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() * 2);
V0 = DAG.getBitcast(ExtVT, V0);
V1 = DAG.getBitcast(ExtVT, V1);
@@ -19805,7 +19810,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
};
// Turn 'a' into a mask suitable for VSELECT: a = a << 12;
- if (Subtarget.hasSSE41()) {
+ if (UseSSE41) {
// On SSE41 targets we need to replicate the shift mask in both
// bytes for PBLENDVB.
Amt = DAG.getNode(
OpenPOWER on IntegriCloud