diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2018-12-21 18:48:32 +0000 |
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2018-12-21 18:48:32 +0000 |
| commit | 80187b8a17ed8feedddf809ecc1cedd60dd0f72d (patch) | |
| tree | 23daab6009251ea7775076a89c6c6608a9c2b4ef /llvm/lib/Target | |
| parent | 8c9f865e3df3af1e27a6f5215c78f4bc42b60d06 (diff) | |
| download | bcm5719-llvm-80187b8a17ed8feedddf809ecc1cedd60dd0f72d.tar.gz bcm5719-llvm-80187b8a17ed8feedddf809ecc1cedd60dd0f72d.zip | |
[x86] add movddup specialization for build vector lowering (PR37502)
This is admittedly a narrow fix for the problem:
https://bugs.llvm.org/show_bug.cgi?id=37502
...but as the XOP restriction shows, it's a maze to get this right.
In the motivating example, note that we have movddup before SSE4.1 and
again with AVX2. That's because insertps isn't available pre-SSE41 and
vbroadcast is (more generally) available with AVX2 (and the splat is
reduced to movddup via isel pattern).
Differential Revision: https://reviews.llvm.org/D55898
llvm-svn: 349937
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 78197191089..43a56f96481 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6951,6 +6951,26 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, /// Custom lower build_vector of v4i32 or v4f32. static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { + // If this is a splat of a pair of elements, use MOVDDUP (unless the target + // has XOP; in that case defer lowering to potentially use VPERMIL2PS). + // Because we're creating a less complicated build vector here, we may enable + // further folding of the MOVDDUP via shuffle transforms. + if (Subtarget.hasSSE3() && !Subtarget.hasXOP() && + Op.getOperand(0) == Op.getOperand(2) && + Op.getOperand(1) == Op.getOperand(3) && + Op.getOperand(0) != Op.getOperand(1)) { + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + MVT EltVT = VT.getVectorElementType(); + // Create a new build vector with the first 2 elements followed by undef + // padding, bitcast to v2f64, duplicate, and bitcast back. + SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1), + DAG.getUNDEF(EltVT), DAG.getUNDEF(EltVT) }; + SDValue NewBV = DAG.getBitcast(MVT::v2f64, DAG.getBuildVector(VT, DL, Ops)); + SDValue Dup = DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, NewBV); + return DAG.getBitcast(VT, Dup); + } + // Find all zeroable elements. std::bitset<4> Zeroable; for (int i=0; i < 4; ++i) { |

