diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2018-10-04 16:25:05 +0000 | 
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2018-10-04 16:25:05 +0000 | 
| commit | 3746e11abedb8dbe017a6dc038944bf35ab5bd29 (patch) | |
| tree | 82d996de8fd3f7b697b758723db28abe90619cbf /llvm/lib | |
| parent | a4c17dd7f38a6c9ef2199d9ee5384c0af37a2d9e (diff) | |
| download | bcm5719-llvm-3746e11abedb8dbe017a6dc038944bf35ab5bd29.tar.gz bcm5719-llvm-3746e11abedb8dbe017a6dc038944bf35ab5bd29.zip | |
[InstCombine] allow bitcast to/from FP for vector insert/extract transform
This is a follow-up to rL343482 / D52439.
This was a pattern that initially caused the commit to be reverted because
the transform requires a bitcast as shown here.
llvm-svn: 343794
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 35 | 
1 files changed, 31 insertions, 4 deletions
| diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 945664de686..f01f2b0eddd 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -189,9 +189,7 @@ static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,    // If the source elements are wider than the destination, try to shift and    // truncate a subset of scalar bits of an insert op. -  // TODO: This is limited to integer types, but we could bitcast to/from FP. -  if (NumSrcElts < NumElts && SrcTy->getScalarType()->isIntegerTy() && -      DestTy->getScalarType()->isIntegerTy()) { +  if (NumSrcElts < NumElts) {      Value *Scalar;      uint64_t InsIndexC;      if (!match(X, m_InsertElement(m_Value(), m_Value(Scalar), @@ -220,13 +218,42 @@ static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,      unsigned Chunk = ExtIndexC % NarrowingRatio;      if (IsBigEndian)        Chunk = NarrowingRatio - 1 - Chunk; -    unsigned ShAmt = Chunk * DestTy->getPrimitiveSizeInBits(); + +    // Bail out if this is an FP vector to FP vector sequence. That would take +    // more instructions than we started with unless there is no shift, and it +    // may not be handled as well in the backend. +    bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy(); +    bool NeedDestBitcast = DestTy->isFloatingPointTy(); +    if (NeedSrcBitcast && NeedDestBitcast) +      return nullptr; + +    unsigned SrcWidth = SrcTy->getScalarSizeInBits(); +    unsigned DestWidth = DestTy->getPrimitiveSizeInBits(); +    unsigned ShAmt = Chunk * DestWidth; + +    // TODO: This limitation is more strict than necessary. We could sum the +    // number of new instructions and subtract the number eliminated to know if +    // we can proceed. +    if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse()) +      if (NeedSrcBitcast || NeedDestBitcast) +        return nullptr; + +    if (NeedSrcBitcast) { +      Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth); +      Scalar = Builder.CreateBitCast(Scalar, SrcIntTy); +    } +      if (ShAmt) {        // Bail out if we could end with more instructions than we started with.        if (!Ext.getVectorOperand()->hasOneUse())          return nullptr;        Scalar = Builder.CreateLShr(Scalar, ShAmt);      } + +    if (NeedDestBitcast) { +      Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth); +      return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy); +    }      return new TruncInst(Scalar, DestTy);    } | 

