diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 32 | 
1 files changed, 14 insertions, 18 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5303d7a406a..dcc6ab2620d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7825,24 +7825,20 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {      }      // Next, we iteratively mix elements, e.g. for v4f32: -    //   Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> -    //         : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> -    //   Step 2: unpcklps X, Y ==>    <3, 2, 1, 0> -    unsigned EltStride = NumElems >> 1; -    while (EltStride != 0) { -      for (unsigned i = 0; i < EltStride; ++i) { -        // If Ops[i+EltStride] is undef and this is the first round of mixing, -        // then it is safe to just drop this shuffle: V[i] is already in the -        // right place, the one element (since it's the first round) being -        // inserted as undef can be dropped.  This isn't safe for successive -        // rounds because they will permute elements within both vectors. -        if (Ops[i+EltStride].isUndef() && -            EltStride == NumElems/2) -          continue; - -        Ops[i] = getUnpackl(DAG, dl, VT, Ops[i], Ops[i + EltStride]); -      } -      EltStride >>= 1; +    //   Step 1: unpcklps 0, 1 ==> X: <?, ?, 1, 0> +    //         : unpcklps 2, 3 ==> Y: <?, ?, 3, 2> +    //   Step 2: unpcklpd X, Y ==>    <3, 2, 1, 0> +    for (unsigned Scale = 1; Scale < NumElems; Scale *= 2) { +      // Generate scaled UNPCKL shuffle mask. +      SmallVector<int, 16> Mask; +      for(unsigned i = 0; i != Scale; ++i) +        Mask.push_back(i); +      for (unsigned i = 0; i != Scale; ++i) +        Mask.push_back(NumElems+i); +      Mask.append(NumElems - Mask.size(), SM_SentinelUndef); + +      for (unsigned i = 0, e = NumElems / (2 * Scale); i != e; ++i) +        Ops[i] = DAG.getVectorShuffle(VT, dl, Ops[2*i], Ops[(2*i)+1], Mask);      }      return Ops[0];    }  | 

