summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2014-09-04 01:13:48 +0000
committerChandler Carruth <chandlerc@gmail.com>2014-09-04 01:13:48 +0000
commitfc0db222b5a13f988f8e4d7afd71204e8f03175d (patch)
treede52961bd95e7d6b06a515852560ec322da1cbd2 /llvm/lib/Target
parentbeed821ffb058c164a885f96f2308c71cc7b632c (diff)
downloadbcm5719-llvm-fc0db222b5a13f988f8e4d7afd71204e8f03175d.tar.gz
bcm5719-llvm-fc0db222b5a13f988f8e4d7afd71204e8f03175d.zip
[x86] Teach the new vector shuffle lowering about the zero masking
abilities of INSERTPS which are really powerful and come up in very important contexts such as forming diagonal matrices, etc. With this I ended up being able to remove the somewhat weird helper I added for INSERTPS because we can collapse the entire state to a no-op mask. Added a bunch of tests for inserting into a zero-ish vector. llvm-svn: 217117
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp64
1 files changed, 42 insertions, 22 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 8b102e4fbb9..c6f73baeb2a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -7182,21 +7182,6 @@ static bool isSingleInputShuffleMask(ArrayRef<int> Mask) {
return true;
}
-/// \brief Check wether all of one set of inputs to a shuffle mask are in place.
-///
-/// Mask entries pointing at the other input or undef will be skipped.
-static bool isShuffleMaskInputInPlace(ArrayRef<int> Mask, bool LoInput = true) {
- int Size = Mask.size();
- for (int i = 0; i < Size; ++i) {
- int M = Mask[i];
- if (M == -1 || (LoInput && M >= 4) || (!LoInput && M < 4))
- continue;
- if (M - (LoInput ? 0 : Size) != i)
- return false;
- }
- return true;
-}
-
// Hide this symbol with an anonymous namespace instead of 'static' so that MSVC
// 2013 will allow us to use it as a non-type template parameter.
namespace {
@@ -7385,13 +7370,48 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// INSERTPS when the V1 elements are already in the correct locations
// because otherwise we can just always use two SHUFPS instructions which
// are much smaller to encode than a SHUFPS and an INSERTPS.
- if (Subtarget->hasSSE41() &&
- isShuffleMaskInputInPlace(Mask, /*LoInput*/ true)) {
- // Insert the V2 element into the desired position.
- SDValue InsertPSMask =
- DAG.getIntPtrConstant(Mask[V2Index] << 6 | V2Index << 4);
- return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
- InsertPSMask);
+ if (Subtarget->hasSSE41()) {
+ // When using INSERTPS we can zero any lane of the destination. Collect
+ // the zero inputs into a mask and drop them from the lanes of V1 which
+ // actually need to be present as inputs to the INSERTPS.
+ unsigned ZMask = 0;
+ if (ISD::isBuildVectorAllZeros(V1.getNode())) {
+ ZMask = 0xF ^ (1 << V2Index);
+ } else if (V1.getOpcode() == ISD::BUILD_VECTOR) {
+ for (int i = 0; i < 4; ++i) {
+ int M = Mask[i];
+ if (M >= 4)
+ continue;
+ if (M > -1) {
+ SDValue Input = V1.getOperand(M);
+ if (Input.getOpcode() != ISD::UNDEF &&
+ !X86::isZeroNode(Input)) {
+ // A non-zero input!
+ ZMask = 0;
+ break;
+ }
+ }
+ ZMask |= 1 << i;
+ }
+ }
+
+ // Synthesize a shuffle mask for the non-zero and non-v2 inputs.
+ int InsertShuffleMask[4] = {-1, -1, -1, -1};
+ for (int i = 0; i < 4; ++i)
+ if (i != V2Index && (ZMask & (1 << i)) == 0)
+ InsertShuffleMask[i] = Mask[i];
+
+ if (isNoopShuffleMask(InsertShuffleMask)) {
+ // Replace V1 with undef if nothing from V1 survives the INSERTPS.
+ if ((ZMask | 1 << V2Index) == 0xF)
+ V1 = DAG.getUNDEF(MVT::v4f32);
+
+ // Insert the V2 element into the desired position.
+ SDValue InsertPSMask =
+ DAG.getIntPtrConstant(Mask[V2Index] << 6 | V2Index << 4 | ZMask);
+ return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
+ InsertPSMask);
+ }
}
// Compute the index adjacent to V2Index and in the same half by toggling
OpenPOWER on IntegriCloud