summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2019-03-27 22:42:11 +0000
committerSanjay Patel <spatel@rotateright.com>2019-03-27 22:42:11 +0000
commit1df0bb6264a39eb295a42f285f016ea698e2f198 (patch)
tree90ac364fe5f8fd0de837323c075785499363f672 /llvm/lib/Target
parent704817912a2d7ea3c95b5194480c4c62b789043d (diff)
downloadbcm5719-llvm-1df0bb6264a39eb295a42f285f016ea698e2f198.tar.gz
bcm5719-llvm-1df0bb6264a39eb295a42f285f016ea698e2f198.zip
[x86] improve AVX lowering of vector zext
If we know the 2 halves of an oversized zext-in-reg are the same, don't create those halves independently. I tried several different approaches to fold this, but it's difficult to get right during legalization. In the default path, we are creating a generic shuffle that looks like an unpack high, but it can get transformed into a different mask (a blend), so it's not straightforward to match that. If we try to fold after it actually becomes an X86ISD::UNPCKH node, we can't be sure what the operand node is - it might be a generic shuffle, or it could be some x86-specific op. From the test output, we should be doing something like this for SSE4.1 as well, but I'd rather leave that as a follow-up since it involves changing lowering actions. Differential Revision: https://reviews.llvm.org/D59777 llvm-svn: 357129
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp20
1 files changed, 20 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 17a93274d83..ba168d35f0a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -9883,6 +9883,20 @@ static bool is128BitUnpackShuffleMask(ArrayRef<int> Mask) {
return false;
}
+/// Return true if a shuffle mask chooses elements identically in its top and
+/// bottom halves. For example, any splat mask has the same top and bottom
+/// halves. If an element is undefined in only one half of the mask, the halves
+/// are not considered identical.
+static bool hasIdenticalHalvesShuffleMask(ArrayRef<int> Mask) {
+ assert(Mask.size() % 2 == 0 && "Expecting even number of elements in mask");
+ unsigned HalfSize = Mask.size() / 2;
+ for (unsigned i = 0; i != HalfSize; ++i) {
+ if (Mask[i] != Mask[i + HalfSize])
+ return false;
+ }
+ return true;
+}
+
/// Get a 4-lane 8-bit shuffle immediate for a mask.
///
/// This helper function produces an 8-bit shuffle immediate corresponding to
@@ -18369,6 +18383,12 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
SDValue OpLo = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, dl, HalfVT, In);
+ // Short-circuit if we can determine that each 128-bit half is the same value.
+ // Otherwise, this is difficult to match and optimize.
+ if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(In))
+ if (hasIdenticalHalvesShuffleMask(Shuf->getMask()))
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpLo);
+
SDValue ZeroVec = DAG.getConstant(0, dl, InVT);
SDValue Undef = DAG.getUNDEF(InVT);
bool NeedZero = Op.getOpcode() == ISD::ZERO_EXTEND;
OpenPOWER on IntegriCloud