summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2017-02-04 23:26:46 +0000
committerCraig Topper <craig.topper@gmail.com>2017-02-04 23:26:46 +0000
commit978fdb75a4a21d8209b51c08d4bcd16a43e0779d (patch)
treea6a814000bdfa2128586aa46643e62e30d6ba12b /llvm/lib/Target/X86/X86ISelLowering.cpp
parent3d95228dbef03144acc0906da65c5ead8175f1dc (diff)
downloadbcm5719-llvm-978fdb75a4a21d8209b51c08d4bcd16a43e0779d.tar.gz
bcm5719-llvm-978fdb75a4a21d8209b51c08d4bcd16a43e0779d.zip
[X86] Add support for folding (insert_subvector vec1, (extract_subvector vec2, idx1), idx1) -> (blendi vec2, vec1).
llvm-svn: 294112
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp27
1 files changed, 25 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f51920f5e6e..2090d32d08c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -34121,8 +34121,7 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
// We are still creating an INSERT_SUBVECTOR below with an undef node to
// extend the subvector to the size of the result vector. Make sure that
// we are not recursing on that node by checking for undef here.
- if (IdxVal == 0 && OpVT.is256BitVector() && SubVecVT.is128BitVector() &&
- !Vec.isUndef()) {
+ if (IdxVal == 0 && OpVT.is256BitVector() && !Vec.isUndef()) {
SDValue Vec256 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
DAG.getUNDEF(OpVT), SubVec, N->getOperand(2));
@@ -34144,6 +34143,30 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
return DAG.getBitcast(OpVT, Vec256);
}
+ // If we're inserting into the upper half of a 256-bit vector with a vector
+ // that was extracted from the upper half of a 256-bit vector, we should
+ // use a blend instead.
+ if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && OpVT.is256BitVector() &&
+ SubVec.getOperand(0).getSimpleValueType() == OpVT &&
+ Idx == SubVec.getOperand(1) && IdxVal == OpVT.getVectorNumElements()/2) {
+
+ // Integers must be cast to 32-bit because there is only vpblendd;
+ // vpblendw can't be used for this because it has a handicapped mask.
+ // If we don't have AVX2, then cast to float. Using a wrong domain blend
+ // is still more efficient than using the wrong domain vinsertf128 that
+ // will be created by InsertSubVector().
+ MVT CastVT = OpVT;
+ if (OpVT.isInteger())
+ CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32;
+
+ // The blend instruction, and therefore its mask, depend on the data type.
+ unsigned MaskVal = CastVT.getScalarSizeInBits() == 64 ? 0x0c : 0xf0;
+ SDValue Mask = DAG.getConstant(MaskVal, dl, MVT::i8);
+ Vec = DAG.getNode(X86ISD::BLENDI, dl, CastVT, DAG.getBitcast(CastVT, Vec),
+ DAG.getBitcast(CastVT, SubVec.getOperand(0)), Mask);
+ return DAG.getBitcast(OpVT, Vec);
+ }
+
// Fold two 16-byte or 32-byte subvector loads into one 32-byte or 64-byte
// load:
// (insert_subvector (insert_subvector undef, (load16 addr), 0),
OpenPOWER on IntegriCloud