summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
diff options
context:
space:
mode:
authorRoland Froese <froese@ca.ibm.com>2019-02-11 17:29:14 +0000
committerRoland Froese <froese@ca.ibm.com>2019-02-11 17:29:14 +0000
commit732fe22454da7d7b4e331b8ab8ea104a37ba36eb (patch)
tree7258a7b70bd9bc3e286324e4549012670c7de823 /llvm/lib/Target/PowerPC/PPCISelLowering.cpp
parentebdb021031a92a569d457ff4d79a09b73752558d (diff)
downloadbcm5719-llvm-732fe22454da7d7b4e331b8ab8ea104a37ba36eb.tar.gz
bcm5719-llvm-732fe22454da7d7b4e331b8ab8ea104a37ba36eb.zip
[PowerPC] Avoid scalarization of vector truncate
The PowerPC code generator currently scalarizes vector truncates that would fit in a vector register, resulting in vector extracts, scalar operations, and vector merges. This patch custom lowers a vector truncate that would fit in a register to a vector shuffle instead. Differential Revision: https://reviews.llvm.org/D56507 llvm-svn: 353724
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp73
1 files changed, 73 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 6fbeb031f01..c271ed95dd6 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -118,6 +118,8 @@ STATISTIC(NumSiblingCalls, "Number of sibling calls");
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
+static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
+
// FIXME: Remove this once the bug has been fixed!
extern cl::opt<bool> ANDIGlueBug;
@@ -639,6 +641,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// with merges, splats, etc.
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
+ // Vector truncates to sub-word integer that fit in an Altivec/VSX register
+ // are cheap, so handle them before they get expanded to scalar.
+ setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
+
setOperationAction(ISD::AND , MVT::v4i32, Legal);
setOperationAction(ISD::OR , MVT::v4i32, Legal);
setOperationAction(ISD::XOR , MVT::v4i32, Legal);
@@ -6794,6 +6804,61 @@ SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
Op.getOperand(0));
}
+SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ // Implements a vector truncate that fits in a vector register as a shuffle.
+ // We want to legalize vector truncates down to where the source fits in
+ // a vector register (and target is therefore smaller than vector register
+ // size). At that point legalization will try to custom lower the sub-legal
+ // result and get here - where we can contain the truncate as a single target
+ // operation.
+
+ // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
+ // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
+ //
+ // We will implement it for big-endian ordering as this (where x denotes
+ // undefined):
+ // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
+ // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
+ //
+ // The same operation in little-endian ordering will be:
+ // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
+ // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
+
+ assert(Op.getValueType().isVector() && "Vector type expected.");
+
+ SDLoc DL(Op);
+ SDValue N1 = Op.getOperand(0);
+ unsigned SrcSize = N1.getValueType().getSizeInBits();
+ assert(SrcSize <= 128 && "Source must fit in an Altivec/VSX vector");
+ SDValue WideSrc = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
+
+ EVT TrgVT = Op.getValueType();
+ unsigned TrgNumElts = TrgVT.getVectorNumElements();
+ EVT EltVT = TrgVT.getVectorElementType();
+ unsigned WideNumElts = 128 / EltVT.getSizeInBits();
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
+
+ // First list the elements we want to keep.
+ unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
+ SmallVector<int, 16> ShuffV;
+ if (Subtarget.isLittleEndian())
+ for (unsigned i = 0; i < TrgNumElts; ++i)
+ ShuffV.push_back(i * SizeMult);
+ else
+ for (unsigned i = 1; i <= TrgNumElts; ++i)
+ ShuffV.push_back(i * SizeMult - 1);
+
+ // Populate the remaining elements with undefs.
+ for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
+ // ShuffV.push_back(i + WideNumElts);
+ ShuffV.push_back(WideNumElts + 1);
+
+ SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, WideSrc);
+ return DAG.getVectorShuffle(WideVT, DL, Conv, DAG.getUNDEF(WideVT), ShuffV);
+}
+
/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
/// possible.
SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -9641,6 +9706,14 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
return;
Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
return;
+ case ISD::TRUNCATE: {
+ EVT TrgVT = N->getValueType(0);
+ if (TrgVT.isVector() &&
+ isOperationCustom(N->getOpcode(), TrgVT) &&
+ N->getOperand(0).getValueType().getSizeInBits() <= 128)
+ Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG));
+ return;
+ }
case ISD::BITCAST:
// Don't handle bitcast here.
return;
OpenPOWER on IntegriCloud