diff options
author | Sanjay Patel <spatel@rotateright.com> | 2018-10-01 14:40:00 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2018-10-01 14:40:00 +0000 |
commit | 31b07198f190f3c8a1724b5451327b0f0451c9d3 (patch) | |
tree | 3fab70c7c5f853227e7d671138061d610a73e727 /llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | |
parent | 1743ebe369f94289d418dd67b18aec26f2ef10b2 (diff) | |
download | bcm5719-llvm-31b07198f190f3c8a1724b5451327b0f0451c9d3.tar.gz bcm5719-llvm-31b07198f190f3c8a1724b5451327b0f0451c9d3.zip |
[InstCombine] try to convert vector insert+extract to trunc; 2nd try
This was originally committed at rL343407, but reverted at
rL343458 because it crashed trying to handle a case where
the destination type is FP. This version of the patch adds
a check for that possibility. Tests added at rL343480.
Original commit message:
This transform is requested for the backend in:
https://bugs.llvm.org/show_bug.cgi?id=39016
...but I figured it was worth doing in IR too, and it's probably
easier to implement here, so that's this patch.
In the simplest case, we are just truncating a scalar value. If the
extract index doesn't correspond to the LSBs of the scalar, then we
have to shift-right before the truncate. Endian-ness makes this tricky,
but hopefully the ASCII-art helps visualize the transform.
Differential Revision: https://reviews.llvm.org/D52439
llvm-svn: 343482
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 48 |
1 files changed, 46 insertions, 2 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index c391034dc00..945664de686 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -167,7 +167,8 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { } static Instruction *foldBitcastExtElt(ExtractElementInst &Ext, - InstCombiner::BuilderTy &Builder) { + InstCombiner::BuilderTy &Builder, + bool IsBigEndian) { Value *X; uint64_t ExtIndexC; if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) || @@ -186,6 +187,49 @@ static Instruction *foldBitcastExtElt(ExtractElementInst &Ext, if (Value *Elt = findScalarElement(X, ExtIndexC)) return new BitCastInst(Elt, DestTy); + // If the source elements are wider than the destination, try to shift and + // truncate a subset of scalar bits of an insert op. + // TODO: This is limited to integer types, but we could bitcast to/from FP. + if (NumSrcElts < NumElts && SrcTy->getScalarType()->isIntegerTy() && + DestTy->getScalarType()->isIntegerTy()) { + Value *Scalar; + uint64_t InsIndexC; + if (!match(X, m_InsertElement(m_Value(), m_Value(Scalar), + m_ConstantInt(InsIndexC)))) + return nullptr; + + // The extract must be from the subset of vector elements that we inserted + // into. Example: if we inserted element 1 of a <2 x i64> and we are + // extracting an i16 (narrowing ratio = 4), then this extract must be from 1 + // of elements 4-7 of the bitcasted vector. + unsigned NarrowingRatio = NumElts / NumSrcElts; + if (ExtIndexC / NarrowingRatio != InsIndexC) + return nullptr; + + // We are extracting part of the original scalar. How that scalar is + // inserted into the vector depends on the endian-ness. Example: + // Vector Byte Elt Index: 0 1 2 3 4 5 6 7 + // +--+--+--+--+--+--+--+--+ + // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3| + // extelt <4 x i16> V', 3: | |S2|S3| + // +--+--+--+--+--+--+--+--+ + // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value. + // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value. + // In this example, we must right-shift little-endian. Big-endian is just a + // truncate. + unsigned Chunk = ExtIndexC % NarrowingRatio; + if (IsBigEndian) + Chunk = NarrowingRatio - 1 - Chunk; + unsigned ShAmt = Chunk * DestTy->getPrimitiveSizeInBits(); + if (ShAmt) { + // Bail out if we could end with more instructions than we started with. + if (!Ext.getVectorOperand()->hasOneUse()) + return nullptr; + Scalar = Builder.CreateLShr(Scalar, ShAmt); + } + return new TruncInst(Scalar, DestTy); + } + return nullptr; } @@ -224,7 +268,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { } } - if (Instruction *I = foldBitcastExtElt(EI, Builder)) + if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian())) return I; // If there's a vector PHI feeding a scalar use through this extractelement |