summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2015-02-19 15:06:13 +0000
committerChandler Carruth <chandlerc@gmail.com>2015-02-19 15:06:13 +0000
commit0b395363906e729392a1c398585e760e08ea955b (patch)
tree2ae8254f118a0bd305cfdb3e44d5e471be6414ee /llvm/lib
parent3c9fb5221863f6d4946de55ed26e76e51f0efa68 (diff)
downloadbcm5719-llvm-0b395363906e729392a1c398585e760e08ea955b.tar.gz
bcm5719-llvm-0b395363906e729392a1c398585e760e08ea955b.zip
[x86] Teach the unpack lowering how to lower with an initial unpack in
addition to lowering to trees rooted in an unpack. This saves shuffles and or registers in many various ways, lets us handle another class of v4i32 shuffles pre SSE4.1 without domain crosses, etc. llvm-svn: 229856
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp37
1 files changed, 36 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 537773d1cfb..45128fe032a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -8467,7 +8467,7 @@ static SDValue lowerVectorShuffleAsUnpack(MVT VT, SDLoc DL, SDValue V1,
return M >= 0 && M % Size < Size / 2;
});
int NumHiInputs = std::count_if(
- Mask.begin(), Mask.end(), [Size](int M) { return M % Size > Size / 2; });
+ Mask.begin(), Mask.end(), [Size](int M) { return M % Size >= Size / 2; });
bool UnpackLo = NumLoInputs >= NumHiInputs;
@@ -8494,6 +8494,12 @@ static SDValue lowerVectorShuffleAsUnpack(MVT VT, SDLoc DL, SDValue V1,
Mask[i] % Size;
}
+ // If we will have to shuffle both inputs to use the unpack, check whether
+ // we can just unpack first and shuffle the result. If so, skip this unpack.
+ if ((NumLoInputs == 0 || NumHiInputs == 0) && !isNoopShuffleMask(V1Mask) &&
+ !isNoopShuffleMask(V2Mask))
+ return SDValue();
+
// Shuffle the inputs into place.
V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask);
V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask);
@@ -8520,6 +8526,35 @@ static SDValue lowerVectorShuffleAsUnpack(MVT VT, SDLoc DL, SDValue V1,
return Unpack;
}
+ // If none of the unpack-rooted lowerings worked (or were profitable) try an
+ // initial unpack.
+ if (NumLoInputs == 0 || NumHiInputs == 0) {
+ assert((NumLoInputs > 0 || NumHiInputs > 0) &&
+ "We have to have *some* inputs!");
+ int HalfOffset = NumLoInputs == 0 ? Size / 2 : 0;
+
+ // FIXME: We could consider the total complexity of the permute of each
+ // possible unpacking. Or at the least we should consider how many
+ // half-crossings are created.
+ // FIXME: We could consider commuting the unpacks.
+
+ SmallVector<int, 32> PermMask;
+ PermMask.assign(Size, -1);
+ for (int i = 0; i < Size; ++i) {
+ if (Mask[i] < 0)
+ continue;
+
+ assert(Mask[i] % Size >= HalfOffset && "Found input from wrong half!");
+
+ PermMask[i] =
+ 2 * ((Mask[i] % Size) - HalfOffset) + (Mask[i] < Size ? 0 : 1);
+ }
+ return DAG.getVectorShuffle(
+ VT, DL, DAG.getNode(NumLoInputs == 0 ? X86ISD::UNPCKH : X86ISD::UNPCKL,
+ DL, VT, V1, V2),
+ DAG.getUNDEF(VT), PermMask);
+ }
+
return SDValue();
}
OpenPOWER on IntegriCloud