summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-11-02 21:09:49 +0000
committerCraig Topper <craig.topper@intel.com>2018-11-02 21:09:49 +0000
commit60c202a4941141c4f211af507e3f5bc44fc63230 (patch)
tree5790506dc1d9890e619a117827684cc63455047c /llvm/lib
parent3095874d3c100261de79b4105ebd4a616332df46 (diff)
downloadbcm5719-llvm-60c202a4941141c4f211af507e3f5bc44fc63230.tar.gz
bcm5719-llvm-60c202a4941141c4f211af507e3f5bc44fc63230.zip
[X86] Don't emit *_extend_vector_inreg nodes when both the input and output types are legal with AVX1
We already have custom lowering for the AVX case in LegalizeVectorOps. So its better to keep the regular extend op around as long as possible. I had to qualify one place in DAG combine that created illegal vector extending load operations. This change by itself had no effect on any tests which is why its included here. I've made a few cleanups to the custom lowering. The sign extend code no longer creates an identity shuffle with undef elements. The zero extend code now emits a zero_extend_vector_inreg instead of an unpckl with a zero vector. For the high half of the custom lowering of zero_extend/any_extend, we're now using an unpckh with a zero vector or undef. Previously we used used a pshufd to move the upper 64-bits to the lower 64-bits and then used a zero_extend_vector_inreg. I think the zero vector should require less execution resources and be smaller code size. Differential Revision: https://reviews.llvm.org/D54024 llvm-svn: 346043
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp2
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp45
2 files changed, 19 insertions, 28 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8c2f9e8d1f4..f318b7fdb39 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8391,7 +8391,7 @@ static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
- if ((LegalOperations || LN0->isVolatile()) &&
+ if ((LegalOperations || LN0->isVolatile() || VT.isVector()) &&
!TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
return {};
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d95f72035e0..57e4cba9078 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -17446,27 +17446,26 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
// Optimize vectors in AVX mode:
//
// v8i16 -> v8i32
- // Use vpunpcklwd for 4 lower elements v8i16 -> v4i32.
+ // Use vpmovzwd for 4 lower elements v8i16 -> v4i32.
// Use vpunpckhwd for 4 upper elements v8i16 -> v4i32.
// Concat upper and lower parts.
//
// v4i32 -> v4i64
- // Use vpunpckldq for 4 lower elements v4i32 -> v2i64.
+ // Use vpmovzdq for 4 lower elements v4i32 -> v2i64.
// Use vpunpckhdq for 4 upper elements v4i32 -> v2i64.
// Concat upper and lower parts.
//
- SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl);
+ MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
+ VT.getVectorNumElements() / 2);
+
+ SDValue OpLo = DAG.getZeroExtendVectorInReg(In, dl, HalfVT);
+
+ SDValue ZeroVec = DAG.getConstant(0, dl, InVT);
SDValue Undef = DAG.getUNDEF(InVT);
bool NeedZero = Op.getOpcode() == ISD::ZERO_EXTEND;
- SDValue OpLo = getUnpackl(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
-
- MVT HVT = MVT::getVectorVT(VT.getVectorElementType(),
- VT.getVectorNumElements()/2);
-
- OpLo = DAG.getBitcast(HVT, OpLo);
- OpHi = DAG.getBitcast(HVT, OpHi);
+ OpHi = DAG.getBitcast(HalfVT, OpHi);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
}
@@ -19878,29 +19877,21 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
// v4i32 to v4i64
//
// Divide input vector into two parts
- // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
+ // for v4i32 the high shuffle mask will be {2, 3, -1, -1}
// use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
// concat the vectors to original VT
- unsigned NumElems = InVT.getVectorNumElements();
- SDValue Undef = DAG.getUNDEF(InVT);
-
- SmallVector<int,8> ShufMask1(NumElems, -1);
- for (unsigned i = 0; i != NumElems/2; ++i)
- ShufMask1[i] = i;
+ MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
+ VT.getVectorNumElements() / 2);
- SDValue OpLo = DAG.getVectorShuffle(InVT, dl, In, Undef, ShufMask1);
+ SDValue OpLo = DAG.getSignExtendVectorInReg(In, dl, HalfVT);
- SmallVector<int,8> ShufMask2(NumElems, -1);
+ unsigned NumElems = InVT.getVectorNumElements();
+ SmallVector<int,8> ShufMask(NumElems, -1);
for (unsigned i = 0; i != NumElems/2; ++i)
- ShufMask2[i] = i + NumElems/2;
-
- SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, ShufMask2);
-
- MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
- VT.getVectorNumElements() / 2);
+ ShufMask[i] = i + NumElems/2;
- OpLo = DAG.getSignExtendVectorInReg(OpLo, dl, HalfVT);
+ SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask);
OpHi = DAG.getSignExtendVectorInReg(OpHi, dl, HalfVT);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
@@ -38323,7 +38314,7 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
// On AVX2+ targets, if the input/output types are both legal then we will be
// able to use SIGN_EXTEND/ZERO_EXTEND directly.
- if (Subtarget.hasInt256() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
+ if (DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
DAG.getTargetLoweringInfo().isTypeLegal(InVT))
return SDValue();
OpenPOWER on IntegriCloud