[x86,SDAG] Introduce any- and sign-extend-vector-inreg nodes analogous

to the zero-extend-vector-inreg node introduced previously for the same purpose: manage the type legalization of widened extend operations, especially to support the experimental widening mode for x86. I'm adding both because sign-extend is expanded in terms of any-extend with shifts to propagate the sign bit. This removes the last fundamental scalarization from vec_cast2.ll (a test case that hit many really bad edge cases for widening legalization), although the trunc tests in that file still appear scalarized because the the shuffle legalization is scalarizing. Funny thing, I've been working on that. Some initial experiments with this and SSE2 scenarios is showing moderately good behavior already for sign extension. Still some work to do on the shuffle combining on X86 before we're generating optimal sequences, but avoiding scalarization is a huge step forward. llvm-svn: 212714
author: Chandler Carruth <chandlerc@gmail.com> 2014-07-10 12:32:32 +0000
committer: Chandler Carruth <chandlerc@gmail.com> 2014-07-10 12:32:32 +0000
commit: 0b666e0648ab76ffebd2df691040dbc6408ec94b (patch)
tree: 947b840ae90f6d96cc686895a33e2913ae77e1ce /llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
parent: 73c532e8a916e0c281c8520c6845183d454add8f (diff)
download: bcm5719-llvm-0b666e0648ab76ffebd2df691040dbc6408ec94b.tar.gz
bcm5719-llvm-0b666e0648ab76ffebd2df691040dbc6408ec94b.zip
1 files changed, 66 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 25dc98e0eb9..507e7ffb1d4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -75,6 +75,20 @@ class VectorLegalizer {
   /// \brief Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
   SDValue ExpandSEXTINREG(SDValue Op);
 
+  /// \brief Implement expansion for ANY_EXTEND_VECTOR_INREG.
+  ///
+  /// Shuffles the low lanes of the operand into place and bitcasts to the proper
+  /// type. The contents of the bits in the extended part of each element are
+  /// undef.
+  SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op);
+
+  /// \brief Implement expansion for SIGN_EXTEND_VECTOR_INREG.
+  ///
+  /// Shuffles the low lanes of the operand into place, bitcasts to the proper
+  /// type, then shifts left and arithmetic shifts right to introduce a sign
+  /// extension.
+  SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op);
+
   /// \brief Implement expansion for ZERO_EXTEND_VECTOR_INREG.
   ///
   /// Shuffles the low lanes of the operand into place and blends zeros into
@@ -280,6 +294,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::FP_EXTEND:
   case ISD::FMA:
   case ISD::SIGN_EXTEND_INREG:
+  case ISD::ANY_EXTEND_VECTOR_INREG:
+  case ISD::SIGN_EXTEND_VECTOR_INREG:
   case ISD::ZERO_EXTEND_VECTOR_INREG:
     QueryType = Node->getValueType(0);
     break;
@@ -621,6 +637,10 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
   switch (Op->getOpcode()) {
   case ISD::SIGN_EXTEND_INREG:
     return ExpandSEXTINREG(Op);
+  case ISD::ANY_EXTEND_VECTOR_INREG:
+    return ExpandANY_EXTEND_VECTOR_INREG(Op);
+  case ISD::SIGN_EXTEND_VECTOR_INREG:
+    return ExpandSIGN_EXTEND_VECTOR_INREG(Op);
   case ISD::ZERO_EXTEND_VECTOR_INREG:
     return ExpandZERO_EXTEND_VECTOR_INREG(Op);
   case ISD::BSWAP:
@@ -717,6 +737,52 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
   return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
 }
 
+// Generically expand a vector anyext in register to a shuffle of the relevant
+// lanes into the appropriate locations, with other lanes left undef.
+SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+  int NumElements = VT.getVectorNumElements();
+  SDValue Src = Op.getOperand(0);
+  EVT SrcVT = Src.getValueType();
+  int NumSrcElements = SrcVT.getVectorNumElements();
+
+  // Build a base mask of undef shuffles.
+  SmallVector<int, 16> ShuffleMask;
+  ShuffleMask.resize(NumSrcElements, -1);
+
+  // Place the extended lanes into the correct locations.
+  int ExtLaneScale = NumSrcElements / NumElements;
+  int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0;
+  for (int i = 0; i < NumElements; ++i)
+    ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
+
+  return DAG.getNode(
+      ISD::BITCAST, DL, VT,
+      DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
+}
+
+SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+  SDValue Src = Op.getOperand(0);
+  EVT SrcVT = Src.getValueType();
+
+  // First build an any-extend node which can be legalized above when we
+  // recurse through it.
+  Op = DAG.getAnyExtendVectorInReg(Src, DL, VT);
+
+  // Now we need sign extend. Do this by shifting the elements. Even if these
+  // aren't legal operations, they have a better chance of being legalized
+  // without full scalarization than the sign extension does.
+  unsigned EltWidth = VT.getVectorElementType().getSizeInBits();
+  unsigned SrcEltWidth = SrcVT.getVectorElementType().getSizeInBits();
+  SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, VT);
+  return DAG.getNode(ISD::SRA, DL, VT,
+                     DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
+                     ShiftAmount);
+}
+
 // Generically expand a vector zext in register to a shuffle of the relevant
 // lanes into the appropriate locations, a blend of zero into the high bits,
 // and a bitcast to the wider element type.
author	Chandler Carruth <chandlerc@gmail.com>	2014-07-10 12:32:32 +0000
committer	Chandler Carruth <chandlerc@gmail.com>	2014-07-10 12:32:32 +0000
commit	0b666e0648ab76ffebd2df691040dbc6408ec94b (patch)
tree	947b840ae90f6d96cc686895a33e2913ae77e1ce /llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
parent	73c532e8a916e0c281c8520c6845183d454add8f (diff)
download	bcm5719-llvm-0b666e0648ab76ffebd2df691040dbc6408ec94b.tar.gz bcm5719-llvm-0b666e0648ab76ffebd2df691040dbc6408ec94b.zip