summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2018-12-21 18:48:32 +0000
committerSanjay Patel <spatel@rotateright.com>2018-12-21 18:48:32 +0000
commit80187b8a17ed8feedddf809ecc1cedd60dd0f72d (patch)
tree23daab6009251ea7775076a89c6c6608a9c2b4ef /llvm/lib/Target
parent8c9f865e3df3af1e27a6f5215c78f4bc42b60d06 (diff)
downloadbcm5719-llvm-80187b8a17ed8feedddf809ecc1cedd60dd0f72d.tar.gz
bcm5719-llvm-80187b8a17ed8feedddf809ecc1cedd60dd0f72d.zip
[x86] add movddup specialization for build vector lowering (PR37502)
This is admittedly a narrow fix for the problem: https://bugs.llvm.org/show_bug.cgi?id=37502 ...but as the XOP restriction shows, it's a maze to get this right. In the motivating example, note that we have movddup before SSE4.1 and again with AVX2. That's because insertps isn't available pre-SSE41 and vbroadcast is (more generally) available with AVX2 (and the splat is reduced to movddup via isel pattern). Differential Revision: https://reviews.llvm.org/D55898 llvm-svn: 349937
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp20
1 files changed, 20 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 78197191089..43a56f96481 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -6951,6 +6951,26 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
/// Custom lower build_vector of v4i32 or v4f32.
static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
+ // If this is a splat of a pair of elements, use MOVDDUP (unless the target
+ // has XOP; in that case defer lowering to potentially use VPERMIL2PS).
+ // Because we're creating a less complicated build vector here, we may enable
+ // further folding of the MOVDDUP via shuffle transforms.
+ if (Subtarget.hasSSE3() && !Subtarget.hasXOP() &&
+ Op.getOperand(0) == Op.getOperand(2) &&
+ Op.getOperand(1) == Op.getOperand(3) &&
+ Op.getOperand(0) != Op.getOperand(1)) {
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+ MVT EltVT = VT.getVectorElementType();
+ // Create a new build vector with the first 2 elements followed by undef
+ // padding, bitcast to v2f64, duplicate, and bitcast back.
+ SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1),
+ DAG.getUNDEF(EltVT), DAG.getUNDEF(EltVT) };
+ SDValue NewBV = DAG.getBitcast(MVT::v2f64, DAG.getBuildVector(VT, DL, Ops));
+ SDValue Dup = DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, NewBV);
+ return DAG.getBitcast(VT, Dup);
+ }
+
// Find all zeroable elements.
std::bitset<4> Zeroable;
for (int i=0; i < 4; ++i) {
OpenPOWER on IntegriCloud