diff options
| author | Mon P Wang <wangmp@apple.com> | 2010-01-24 00:05:03 +0000 | 
|---|---|---|
| committer | Mon P Wang <wangmp@apple.com> | 2010-01-24 00:05:03 +0000 | 
| commit | 586d997e98d7f2c2b02d50422da3f604062ab68b (patch) | |
| tree | 21f5032acc12d4ab910236470ee605554bd78460 /llvm/lib/CodeGen | |
| parent | e112ff64c5b80eb2b86ecf45522d27f0ae160318 (diff) | |
| download | bcm5719-llvm-586d997e98d7f2c2b02d50422da3f604062ab68b.tar.gz bcm5719-llvm-586d997e98d7f2c2b02d50422da3f604062ab68b.zip | |
Improved widening loads by adding support for wider loads if
the alignment allows.  Fixed a bug where we didn't use a
vector load/store for PR5626.
llvm-svn: 94338
Diffstat (limited to 'llvm/lib/CodeGen')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 54 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 602 | 
3 files changed, 385 insertions, 275 deletions
| diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 5e3f58a8af4..1c83869d774 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1533,10 +1533,10 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {      Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);      // If EltVT smaller than OpVT, only store the bits necessary. -    if (EltVT.bitsLT(OpVT)) +    if (!OpVT.isVector() && EltVT.bitsLT(OpVT)) {        Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,                            Node->getOperand(i), Idx, SV, Offset, EltVT)); -    else +    } else        Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,                                       Node->getOperand(i), Idx, SV, Offset));    } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index b5dbd41eb97..bf231d9e526 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -633,43 +633,33 @@ private:    // Vector Widening Utilities Support: LegalizeVectorTypes.cpp    //===--------------------------------------------------------------------===// -  /// Helper genWidenVectorLoads - Helper function to generate a set of +  /// Helper GenWidenVectorLoads - Helper function to generate a set of    /// loads to load a vector with a resulting wider type. It takes -  ///   ExtType: Extension type -  ///   LdChain: list of chains for the load we have generated. -  ///   Chain:   incoming chain for the ld vector. -  ///   BasePtr: base pointer to load from. -  ///   SV:         memory disambiguation source value. -  ///   SVOffset:   memory disambiugation offset. -  ///   Alignment:  alignment of the memory. -  ///   isVolatile: volatile load. -  ///   LdWidth:    width of memory that we want to load. -  ///   ResType:    the wider result result type for the resulting vector. -  ///   dl:         DebugLoc to be applied to new nodes -  SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, SDValue Chain, -                              SDValue BasePtr, const Value *SV, -                              int SVOffset, unsigned Alignment, -                              bool isVolatile, unsigned LdWidth, -                              EVT ResType, DebugLoc dl); +  ///   LdChain: list of chains for the load to be generated. +  ///   Ld:      load to widen +  SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, +                              LoadSDNode *LD); + +  /// GenWidenVectorExtLoads - Helper function to generate a set of extension +  /// loads to load a ector with a resulting wider type.  It takes +  ///   LdChain: list of chains for the load to be generated. +  ///   Ld:      load to widen +  ///   ExtType: extension element type +  SDValue GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, +                                 LoadSDNode *LD, ISD::LoadExtType ExtType);    /// Helper genWidenVectorStores - Helper function to generate a set of    /// stores to store a widen vector into non widen memory -  /// It takes    ///   StChain: list of chains for the stores we have generated -  ///   Chain:   incoming chain for the ld vector -  ///   BasePtr: base pointer to load from -  ///   SV:      memory disambiguation source value -  ///   SVOffset:   memory disambiugation offset -  ///   Alignment:  alignment of the memory -  ///   isVolatile: volatile lod -  ///   ValOp:   value to store -  ///   StWidth: width of memory that we want to store -  ///   dl:         DebugLoc to be applied to new nodes -  void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, SDValue Chain, -                            SDValue BasePtr, const Value *SV, -                            int SVOffset, unsigned Alignment, -                            bool isVolatile, SDValue ValOp, -                            unsigned StWidth, DebugLoc dl); +  ///   ST:      store of a widen value +  void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, StoreSDNode *ST); + +  /// Helper genWidenVectorTruncStores - Helper function to generate a set of +  /// stores to store a truncate widen vector into non widen memory +  ///   StChain: list of chains for the stores we have generated +  ///   ST:      store of a widen value +  void GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, +                                 StoreSDNode *ST);    /// Modifies a vector input (widen or narrows) to a vector of NVT.  The    /// input vector must have the same element type as NVT. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 808bac70fda..192cdeee835 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1655,68 +1655,24 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {  SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {    LoadSDNode *LD = cast<LoadSDNode>(N); -  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); -  EVT LdVT    = LD->getMemoryVT(); -  DebugLoc dl = N->getDebugLoc(); -  assert(LdVT.isVector() && WidenVT.isVector()); - -  // Load information -  SDValue   Chain = LD->getChain(); -  SDValue   BasePtr = LD->getBasePtr(); -  int       SVOffset = LD->getSrcValueOffset(); -  unsigned  Align    = LD->getAlignment(); -  bool      isVolatile = LD->isVolatile(); -  const Value *SV = LD->getSrcValue();    ISD::LoadExtType ExtType = LD->getExtensionType();    SDValue Result;    SmallVector<SDValue, 16> LdChain;  // Chain for the series of load -  if (ExtType != ISD::NON_EXTLOAD) { -    // For extension loads, we can not play the tricks of chopping legal -    // vector types and bit cast it to the right type.  Instead, we unroll -    // the load and build a vector. -    EVT EltVT = WidenVT.getVectorElementType(); -    EVT LdEltVT = LdVT.getVectorElementType(); -    unsigned NumElts = LdVT.getVectorNumElements(); - -    // Load each element and widen -    unsigned WidenNumElts = WidenVT.getVectorNumElements(); -    SmallVector<SDValue, 16> Ops(WidenNumElts); -    unsigned Increment = LdEltVT.getSizeInBits() / 8; -    Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset, -                            LdEltVT, isVolatile, Align); -    LdChain.push_back(Ops[0].getValue(1)); -    unsigned i = 0, Offset = Increment; -    for (i=1; i < NumElts; ++i, Offset += Increment) { -      SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), -                                       BasePtr, DAG.getIntPtrConstant(Offset)); -      Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV, -                              SVOffset + Offset, LdEltVT, isVolatile, Align); -      LdChain.push_back(Ops[i].getValue(1)); -    } - -    // Fill the rest with undefs -    SDValue UndefVal = DAG.getUNDEF(EltVT); -    for (; i != WidenNumElts; ++i) -      Ops[i] = UndefVal; - -    Result =  DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size()); -  } else { -    assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); -    unsigned int LdWidth = LdVT.getSizeInBits(); -    Result = GenWidenVectorLoads(LdChain, Chain, BasePtr, SV, SVOffset, -                                 Align, isVolatile, LdWidth, WidenVT, dl); -  } - - // If we generate a single load, we can use that for the chain.  Otherwise, - // build a factor node to remember the multiple loads are independent and - // chain to that. - SDValue NewChain; - if (LdChain.size() == 1) -   NewChain = LdChain[0]; - else -   NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LdChain[0], -                          LdChain.size()); +  if (ExtType != ISD::NON_EXTLOAD) +    Result = GenWidenVectorExtLoads(LdChain, LD, ExtType); +  else +    Result = GenWidenVectorLoads(LdChain, LD); + +  // If we generate a single load, we can use that for the chain.  Otherwise, +  // build a factor node to remember the multiple loads are independent and +  // chain to that. +  SDValue NewChain; +  if (LdChain.size() == 1) +    NewChain = LdChain[0]; +  else +    NewChain = DAG.getNode(ISD::TokenFactor, LD->getDebugLoc(), MVT::Other, +                           &LdChain[0], LdChain.size());    // Modified the chain - switch anything that used the old chain to use    // the new one. @@ -1954,57 +1910,17 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {    // We have to widen the value but we want only to store the original    // vector type.    StoreSDNode *ST = cast<StoreSDNode>(N); -  SDValue  Chain = ST->getChain(); -  SDValue  BasePtr = ST->getBasePtr(); -  const    Value *SV = ST->getSrcValue(); -  int      SVOffset = ST->getSrcValueOffset(); -  unsigned Align = ST->getAlignment(); -  bool     isVolatile = ST->isVolatile(); -  SDValue  ValOp = GetWidenedVector(ST->getValue()); -  DebugLoc dl = N->getDebugLoc(); - -  EVT StVT = ST->getMemoryVT(); -  EVT ValVT = ValOp.getValueType(); -  // It must be true that we the widen vector type is bigger than where -  // we need to store. -  assert(StVT.isVector() && ValOp.getValueType().isVector()); -  assert(StVT.bitsLT(ValOp.getValueType()));    SmallVector<SDValue, 16> StChain; -  if (ST->isTruncatingStore()) { -    // For truncating stores, we can not play the tricks of chopping legal -    // vector types and bit cast it to the right type.  Instead, we unroll -    // the store. -    EVT StEltVT  = StVT.getVectorElementType(); -    EVT ValEltVT = ValVT.getVectorElementType(); -    unsigned Increment = ValEltVT.getSizeInBits() / 8; -    unsigned NumElts = StVT.getVectorNumElements(); -    SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, -                              DAG.getIntPtrConstant(0)); -    StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV, -                                        SVOffset, StEltVT, -                                        isVolatile, Align)); -    unsigned Offset = Increment; -    for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { -      SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), -                                       BasePtr, DAG.getIntPtrConstant(Offset)); -      SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, -                              DAG.getIntPtrConstant(0)); -      StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV, -                                          SVOffset + Offset, StEltVT, -                                          isVolatile, MinAlign(Align, Offset))); -    } -  } -  else { -    assert(StVT.getVectorElementType() == ValVT.getVectorElementType()); -    // Store value -    GenWidenVectorStores(StChain, Chain, BasePtr, SV, SVOffset, -                         Align, isVolatile, ValOp, StVT.getSizeInBits(), dl); -  } +  if (ST->isTruncatingStore()) +    GenWidenVectorTruncStores(StChain, ST); +  else +    GenWidenVectorStores(StChain, ST); +    if (StChain.size() == 1)      return StChain[0];    else -    return DAG.getNode(ISD::TokenFactor, dl, +    return DAG.getNode(ISD::TokenFactor, ST->getDebugLoc(),                         MVT::Other,&StChain[0],StChain.size());  } @@ -2012,179 +1928,383 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {  // Vector Widening Utilities  //===----------------------------------------------------------------------===// +// Utility function to find the type to chop up a widen vector for load/store +//  TLI:       Target lowering used to determine legal types. +//  Width:     Width left need to load/store. +//  WidenVT:   The widen vector type to load to/store from +//  Align:     If 0, don't allow use of a wider type +//  WidenEx:   If Align is not 0, the amount additional we can load/store from. + +static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, +                       unsigned Width, EVT WidenVT, +                       unsigned Align = 0, unsigned WidenEx = 0) { +  EVT WidenEltVT = WidenVT.getVectorElementType(); +  unsigned WidenWidth = WidenVT.getSizeInBits(); +  unsigned WidenEltWidth = WidenEltVT.getSizeInBits(); +  unsigned AlignInBits = Align*8; + +  // If we have one element to load/store, return it. +  EVT RetVT = WidenEltVT; +  if (Width == WidenEltWidth) +    return RetVT; + +  // See if there is larger legal integer than the element type to load/store  +  unsigned VT; +  for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE; +       VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) { +    EVT MemVT((MVT::SimpleValueType) VT); +    unsigned MemVTWidth = MemVT.getSizeInBits(); +    if (MemVT.getSizeInBits() <= WidenEltWidth) +      break; +    if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 && +        (MemVTWidth <= Width || +         (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { +      RetVT = MemVT; +      break; +    } +  } -// Utility function to find a vector type and its associated element -// type from a preferred width and whose vector type must be the same size -// as the VecVT. -//  TLI:   Target lowering used to determine legal types. -//  Width: Preferred width to store. -//  VecVT: Vector value type whose size we must match. -// Returns NewVecVT and NewEltVT - the vector type and its associated -// element type. -static void FindAssocWidenVecType(SelectionDAG& DAG, -                                  const TargetLowering &TLI, unsigned Width, -                                  EVT VecVT, -                                  EVT& NewEltVT, EVT& NewVecVT) { -  unsigned EltWidth = Width + 1; -  if (TLI.isTypeLegal(VecVT)) { -    // We start with the preferred with, making it a power of 2 and find a -    // legal vector type of that width.  If not, we reduce it by another of 2. -    // For incoming type is legal, this process will end as a vector of the -    // smallest loadable type should always be legal. -    do { -      assert(EltWidth > 0); -      EltWidth = 1 << Log2_32(EltWidth - 1); -      NewEltVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth); -      unsigned NumElts = VecVT.getSizeInBits() / EltWidth; -      NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, NumElts); -    } while (!TLI.isTypeLegal(NewVecVT) || -             VecVT.getSizeInBits() != NewVecVT.getSizeInBits()); -  } else { -    // The incoming vector type is illegal and is the result of widening -    // a vector to a power of 2. In this case, we will use the preferred -    // with as long as it is a multiple of the incoming vector length. -    // The legalization process will eventually make this into a legal type -    // and remove the illegal bit converts (which would turn to stack converts -    // if they are allow to exist). -     do { -      assert(EltWidth > 0); -      EltWidth = 1 << Log2_32(EltWidth - 1); -      NewEltVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth); -      unsigned NumElts = VecVT.getSizeInBits() / EltWidth; -      NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, NumElts); -    } while (!TLI.isTypeLegal(NewEltVT) || -             VecVT.getSizeInBits() != NewVecVT.getSizeInBits()); +  // See if there is a larger vector type to load/store that has the same vector +  // element type and is evenly divisible with the WidenVT. +  for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE; +       VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) { +    EVT MemVT = (MVT::SimpleValueType) VT; +    unsigned MemVTWidth = MemVT.getSizeInBits(); +    if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() && +        (WidenWidth % MemVTWidth) == 0 && +        (MemVTWidth <= Width || +         (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { +      if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT) +        return MemVT; +    }    } + +  return RetVT; +} + +// Builds a vector type from scalar loads +//  VecTy: Resulting Vector type +//  LDOps: Load operators to build a vector type +//  [Start,End) the list of loads to use. +static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, +                                     SmallVector<SDValue, 16>& LdOps, +                                     unsigned Start, unsigned End) { +  DebugLoc dl = LdOps[Start].getDebugLoc(); +  EVT LdTy = LdOps[Start].getValueType(); +  unsigned Width = VecTy.getSizeInBits(); +  unsigned NumElts = Width / LdTy.getSizeInBits(); +  EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts); + +  unsigned Idx = 1; +  SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]); + +  for (unsigned i = Start + 1; i != End; ++i) { +    EVT NewLdTy = LdOps[i].getValueType(); +    if (NewLdTy != LdTy) { +      NumElts = Width / NewLdTy.getSizeInBits(); +      NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts); +      VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp); +      // Readjust position and vector position based on new load type +      Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits(); +      LdTy = NewLdTy; +    } +    VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i], +                        DAG.getIntPtrConstant(Idx++)); +  } +  return DAG.getNode(ISD::BIT_CONVERT, dl, VecTy, VecOp);  }  SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, -                                              SDValue      Chain, -                                              SDValue      BasePtr, -                                              const Value *SV, -                                              int          SVOffset, -                                              unsigned     Alignment, -                                              bool         isVolatile, -                                              unsigned     LdWidth, -                                              EVT          ResType, -                                              DebugLoc     dl) { +                                              LoadSDNode * LD) {    // The strategy assumes that we can efficiently load powers of two widths. -  // The routines chops the vector into the largest power of 2 load and -  // can be inserted into a legal vector and then cast the result into the -  // vector type we want.  This avoids unnecessary stack converts. +  // The routines chops the vector into the largest vector loads with the same +  // element type or scalar loads and then recombines it to the widen vector +  // type. +  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); +  unsigned WidenWidth = WidenVT.getSizeInBits(); +  EVT LdVT    = LD->getMemoryVT(); +  DebugLoc dl = LD->getDebugLoc(); +  assert(LdVT.isVector() && WidenVT.isVector()); +  assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); + +  // Load information +  SDValue   Chain = LD->getChain(); +  SDValue   BasePtr = LD->getBasePtr(); +  int       SVOffset = LD->getSrcValueOffset(); +  unsigned  Align    = LD->getAlignment(); +  bool      isVolatile = LD->isVolatile(); +  const Value *SV = LD->getSrcValue(); -  // TODO: If the Ldwidth is legal, alignment is the same as the LdWidth, and -  //       the load is nonvolatile, we an use a wider load for the value. +  int LdWidth = LdVT.getSizeInBits(); +  int WidthDiff = WidenWidth - LdWidth;          // Difference +  unsigned LdAlign = (isVolatile) ? 0 : Align; // Allow wider loads    // Find the vector type that can load from. -  EVT NewEltVT, NewVecVT; -  unsigned NewEltVTWidth; -  FindAssocWidenVecType(DAG, TLI, LdWidth, ResType, NewEltVT, NewVecVT); -  NewEltVTWidth = NewEltVT.getSizeInBits(); - -  SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV, SVOffset, -                             isVolatile, Alignment); -  SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); +  EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); +  int NewVTWidth = NewVT.getSizeInBits(); +  SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, SVOffset, +                             isVolatile, Align);    LdChain.push_back(LdOp.getValue(1));    // Check if we can load the element with one instruction -  if (LdWidth == NewEltVTWidth) { -    return DAG.getNode(ISD::BIT_CONVERT, dl, ResType, VecOp); +  if (LdWidth <= NewVTWidth) { +    if (NewVT.isVector()) { +      if (NewVT != WidenVT) { +        assert(WidenWidth % NewVTWidth == 0); +        unsigned NumConcat = WidenWidth / NewVTWidth; +        SmallVector<SDValue, 16> ConcatOps(NumConcat); +        SDValue UndefVal = DAG.getUNDEF(NewVT); +        ConcatOps[0] = LdOp; +        for (unsigned i = 1; i != NumConcat; ++i) +          ConcatOps[i] = UndefVal; +        return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], +                           NumConcat); +      } else +        return LdOp; +    } else { +      unsigned NumElts = WidenWidth / LdWidth; +      EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); +      SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); +      return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp); +    }    } -  unsigned Idx = 1; -  LdWidth -= NewEltVTWidth; +  // Load vector by using multiple loads from largest vector to scalar +  SmallVector<SDValue, 16> LdOps; +  LdOps.push_back(LdOp); + +  LdWidth -= NewVTWidth;    unsigned Offset = 0;    while (LdWidth > 0) { -    unsigned Increment = NewEltVTWidth / 8; +    unsigned Increment = NewVTWidth / 8;      Offset += Increment;      BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,                            DAG.getIntPtrConstant(Increment)); -    if (LdWidth < NewEltVTWidth) { -      // Our current type we are using is too large, use a smaller size by -      // using a smaller power of 2 -      unsigned oNewEltVTWidth = NewEltVTWidth; -      FindAssocWidenVecType(DAG, TLI, LdWidth, ResType, NewEltVT, NewVecVT); -      NewEltVTWidth = NewEltVT.getSizeInBits(); -      // Readjust position and vector position based on new load type -      Idx = Idx * (oNewEltVTWidth/NewEltVTWidth); -      VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp); +    if (LdWidth < NewVTWidth) { +      // Our current type we are using is too large, find a better size +      NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); +      NewVTWidth = NewVT.getSizeInBits();      } -    SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV, -                                 SVOffset+Offset, isVolatile, -                                 MinAlign(Alignment, Offset)); +    SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, +                               SVOffset+Offset, isVolatile, +                               MinAlign(Align, Increment));      LdChain.push_back(LdOp.getValue(1)); -    VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOp, -                        DAG.getIntPtrConstant(Idx++)); +    LdOps.push_back(LdOp); -    LdWidth -= NewEltVTWidth; +    LdWidth -= NewVTWidth;    } -  return DAG.getNode(ISD::BIT_CONVERT, dl, ResType, VecOp); -} +  // Build the vector from the loads operations +  unsigned End = LdOps.size(); +  if (LdOps[0].getValueType().isVector()) { +    // If the load contains vectors, build the vector using concat vector. +    // All of the vectors used to loads are power of 2 and the scalars load +    // can be combined to make a power of 2 vector. +    SmallVector<SDValue, 16> ConcatOps(End); +    int i = End - 1; +    int Idx = End; +    EVT LdTy = LdOps[i].getValueType(); +    // First combine the scalar loads to a vector +    if (!LdTy.isVector())  { +      for (--i; i >= 0; --i) { +        LdTy = LdOps[i].getValueType(); +        if (LdTy.isVector()) +          break; +      } +      ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End); +    } +    ConcatOps[--Idx] = LdOps[i]; +    for (--i; i >= 0; --i) { +      EVT NewLdTy = LdOps[i].getValueType(); +      if (NewLdTy != LdTy) { +        // Create a larger vector +        ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy, +                                       &ConcatOps[Idx], End - Idx); +        Idx = End - 1; +        LdTy = NewLdTy; +      } +      ConcatOps[--Idx] = LdOps[i]; +    } -void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, -                                            SDValue   Chain, -                                            SDValue   BasePtr, -                                            const Value *SV, -                                            int         SVOffset, -                                            unsigned    Alignment, -                                            bool        isVolatile, -                                            SDValue     ValOp, -                                            unsigned    StWidth, -                                            DebugLoc    dl) { -  // Breaks the stores into a series of power of 2 width stores.  For any -  // width, we convert the vector to the vector of element size that we -  // want to store.  This avoids requiring a stack convert. - -  // Find a width of the element type we can store with -  EVT WidenVT = ValOp.getValueType(); -  EVT NewEltVT, NewVecVT; - -  FindAssocWidenVecType(DAG, TLI, StWidth, WidenVT, NewEltVT, NewVecVT); -  unsigned NewEltVTWidth = NewEltVT.getSizeInBits(); - -  SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp); -  SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewEltVT, VecOp, -                            DAG.getIntPtrConstant(0)); -  SDValue StOp = DAG.getStore(Chain, dl, EOp, BasePtr, SV, SVOffset, -                               isVolatile, Alignment); -  StChain.push_back(StOp); +    if (WidenWidth != LdTy.getSizeInBits()*(End - Idx)) { +      // We need to fill the rest with undefs to build the vector +      unsigned NumOps = WidenWidth / LdTy.getSizeInBits(); +      SmallVector<SDValue, 16> WidenOps(NumOps); +      SDValue UndefVal = DAG.getUNDEF(LdTy); +      unsigned i = 0; +      for (; i != End-Idx; ++i) +        WidenOps[i] = ConcatOps[Idx+i]; +      for (; i != NumOps; ++i) +        WidenOps[i] = UndefVal; +      return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps); +    } else +      return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, +                         &ConcatOps[Idx], End - Idx); +  } else // All the loads are scalar loads. +    return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End); +} + +SDValue +DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, +                                         LoadSDNode * LD, +                                         ISD::LoadExtType ExtType) { +  // For extension loads, it may not be more efficient to chop up the vector +  // and then extended it.  Instead, we unroll the load and build a new vector. +  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); +  EVT LdVT    = LD->getMemoryVT(); +  DebugLoc dl = LD->getDebugLoc(); +  assert(LdVT.isVector() && WidenVT.isVector()); -  // Check if we are done -  if (StWidth == NewEltVTWidth) { -    return; +  // Load information +  SDValue   Chain = LD->getChain(); +  SDValue   BasePtr = LD->getBasePtr(); +  int       SVOffset = LD->getSrcValueOffset(); +  unsigned  Align    = LD->getAlignment(); +  bool      isVolatile = LD->isVolatile(); +  const Value *SV = LD->getSrcValue(); + +  EVT EltVT = WidenVT.getVectorElementType(); +  EVT LdEltVT = LdVT.getVectorElementType(); +  unsigned NumElts = LdVT.getVectorNumElements(); + +  // Load each element and widen +  unsigned WidenNumElts = WidenVT.getVectorNumElements(); +  SmallVector<SDValue, 16> Ops(WidenNumElts); +  unsigned Increment = LdEltVT.getSizeInBits() / 8; +  Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset, +                          LdEltVT, isVolatile, Align); +  LdChain.push_back(Ops[0].getValue(1)); +  unsigned i = 0, Offset = Increment; +  for (i=1; i < NumElts; ++i, Offset += Increment) { +    SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), +                                     BasePtr, DAG.getIntPtrConstant(Offset)); +    Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV, +                            SVOffset + Offset, LdEltVT, isVolatile, Align); +    LdChain.push_back(Ops[i].getValue(1));    } -  unsigned Idx = 1; -  StWidth -= NewEltVTWidth; -  unsigned Offset = 0; +  // Fill the rest with undefs +  SDValue UndefVal = DAG.getUNDEF(EltVT); +  for (; i != WidenNumElts; ++i) +    Ops[i] = UndefVal; -  while (StWidth > 0) { -    unsigned Increment = NewEltVTWidth / 8; -    Offset += Increment; -    BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, -                          DAG.getIntPtrConstant(Increment)); +  return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size()); +} -    if (StWidth < NewEltVTWidth) { -      // Our current type we are using is too large, use a smaller size by -      // using a smaller power of 2 -      unsigned oNewEltVTWidth = NewEltVTWidth; -      FindAssocWidenVecType(DAG, TLI, StWidth, WidenVT, NewEltVT, NewVecVT); -      NewEltVTWidth = NewEltVT.getSizeInBits(); -      // Readjust position and vector position based on new load type -      Idx = Idx * (oNewEltVTWidth/NewEltVTWidth); -      VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp); -    } -    EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewEltVT, VecOp, +void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, +                                            StoreSDNode *ST) { +  // The strategy assumes that we can efficiently store powers of two widths. +  // The routines chops the vector into the largest vector stores with the same +  // element type or scalar stores. +  SDValue  Chain = ST->getChain(); +  SDValue  BasePtr = ST->getBasePtr(); +  const    Value *SV = ST->getSrcValue(); +  int      SVOffset = ST->getSrcValueOffset(); +  unsigned Align = ST->getAlignment(); +  bool     isVolatile = ST->isVolatile(); +  SDValue  ValOp = GetWidenedVector(ST->getValue()); +  DebugLoc dl = ST->getDebugLoc(); + +  EVT StVT = ST->getMemoryVT(); +  unsigned StWidth = StVT.getSizeInBits(); +  EVT ValVT = ValOp.getValueType(); +  unsigned ValWidth = ValVT.getSizeInBits(); +  EVT ValEltVT = ValVT.getVectorElementType(); +  unsigned ValEltWidth = ValEltVT.getSizeInBits(); +  assert(StVT.getVectorElementType() == ValEltVT); + +  int Idx = 0;          // current index to store +  unsigned Offset = 0;  // offset from base to store +  while (StWidth != 0) { +    // Find the largest vector type we can store with +    EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT); +    unsigned NewVTWidth = NewVT.getSizeInBits(); +    unsigned Increment = NewVTWidth / 8; +    if (NewVT.isVector()) { +      unsigned NumVTElts = NewVT.getVectorNumElements(); +      do { +        SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, +                                   DAG.getIntPtrConstant(Idx)); +        StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV, +                                       SVOffset + Offset, isVolatile, +                                       MinAlign(Align, Offset))); +        StWidth -= NewVTWidth; +        Offset += Increment; +        Idx += NumVTElts; +        BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, +                              DAG.getIntPtrConstant(Increment)); +      } while (StWidth != 0 && StWidth >= NewVTWidth); +    } else { +      // Cast the vector to the scalar type we can store +      unsigned NumElts = ValWidth / NewVTWidth; +      EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); +      SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp); +      // Readjust index position based on new vector type +      Idx = Idx * ValEltWidth / NewVTWidth; +      do { +        SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,                        DAG.getIntPtrConstant(Idx++)); -    StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV, +        StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,                                     SVOffset + Offset, isVolatile, -                                   MinAlign(Alignment, Offset))); -    StWidth -= NewEltVTWidth; +                                   MinAlign(Align, Offset))); +        StWidth -= NewVTWidth; +        Offset += Increment; +        BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, +                              DAG.getIntPtrConstant(Increment)); +      } while (StWidth != 0  && StWidth >= NewVTWidth); +      // Restore index back to be relative to the original widen element type +      Idx = Idx * NewVTWidth / ValEltWidth; +    } +  } +} + +void +DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, +                                            StoreSDNode *ST) { +  // For extension loads, it may not be more efficient to truncate the vector +  // and then store it.  Instead, we extract each element and then store it. +  SDValue  Chain = ST->getChain(); +  SDValue  BasePtr = ST->getBasePtr(); +  const    Value *SV = ST->getSrcValue(); +  int      SVOffset = ST->getSrcValueOffset(); +  unsigned Align = ST->getAlignment(); +  bool     isVolatile = ST->isVolatile(); +  SDValue  ValOp = GetWidenedVector(ST->getValue()); +  DebugLoc dl = ST->getDebugLoc(); +   +  EVT StVT = ST->getMemoryVT(); +  EVT ValVT = ValOp.getValueType(); + +  // It must be true that we the widen vector type is bigger than where +  // we need to store. +  assert(StVT.isVector() && ValOp.getValueType().isVector()); +  assert(StVT.bitsLT(ValOp.getValueType())); + +  // For truncating stores, we can not play the tricks of chopping legal +  // vector types and bit cast it to the right type.  Instead, we unroll +  // the store. +  EVT StEltVT  = StVT.getVectorElementType(); +  EVT ValEltVT = ValVT.getVectorElementType(); +  unsigned Increment = ValEltVT.getSizeInBits() / 8; +  unsigned NumElts = StVT.getVectorNumElements(); +  SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, +                            DAG.getIntPtrConstant(0)); +  StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV, +                                      SVOffset, StEltVT, +                                      isVolatile, Align)); +  unsigned Offset = Increment; +  for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { +    SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), +                                     BasePtr, DAG.getIntPtrConstant(Offset)); +    SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, +                            DAG.getIntPtrConstant(0)); +    StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV, +                                        SVOffset + Offset, StEltVT, +                                        isVolatile, MinAlign(Align, Offset)));    }  } | 

