[InstCombine] try to convert vector insert+extract to trunc; 2nd try

This was originally committed at rL343407, but reverted at rL343458 because it crashed trying to handle a case where the destination type is FP. This version of the patch adds a check for that possibility. Tests added at rL343480. Original commit message: This transform is requested for the backend in: https://bugs.llvm.org/show_bug.cgi?id=39016 ...but I figured it was worth doing in IR too, and it's probably easier to implement here, so that's this patch. In the simplest case, we are just truncating a scalar value. If the extract index doesn't correspond to the LSBs of the scalar, then we have to shift-right before the truncate. Endian-ness makes this tricky, but hopefully the ASCII-art helps visualize the transform. Differential Revision: https://reviews.llvm.org/D52439 llvm-svn: 343482
author: Sanjay Patel <spatel@rotateright.com> 2018-10-01 14:40:00 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2018-10-01 14:40:00 +0000
commit: 31b07198f190f3c8a1724b5451327b0f0451c9d3 (patch)
tree: 3fab70c7c5f853227e7d671138061d610a73e727 /llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
parent: 1743ebe369f94289d418dd67b18aec26f2ef10b2 (diff)
download: bcm5719-llvm-31b07198f190f3c8a1724b5451327b0f0451c9d3.tar.gz
bcm5719-llvm-31b07198f190f3c8a1724b5451327b0f0451c9d3.zip
1 files changed, 46 insertions, 2 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index c391034dc00..945664de686 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -167,7 +167,8 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
 }
 
 static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
-                                      InstCombiner::BuilderTy &Builder) {
+                                      InstCombiner::BuilderTy &Builder,
+                                      bool IsBigEndian) {
   Value *X;
   uint64_t ExtIndexC;
   if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
@@ -186,6 +187,49 @@ static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
     if (Value *Elt = findScalarElement(X, ExtIndexC))
       return new BitCastInst(Elt, DestTy);
 
+  // If the source elements are wider than the destination, try to shift and
+  // truncate a subset of scalar bits of an insert op.
+  // TODO: This is limited to integer types, but we could bitcast to/from FP.
+  if (NumSrcElts < NumElts && SrcTy->getScalarType()->isIntegerTy() &&
+      DestTy->getScalarType()->isIntegerTy()) {
+    Value *Scalar;
+    uint64_t InsIndexC;
+    if (!match(X, m_InsertElement(m_Value(), m_Value(Scalar),
+                                  m_ConstantInt(InsIndexC))))
+      return nullptr;
+
+    // The extract must be from the subset of vector elements that we inserted
+    // into. Example: if we inserted element 1 of a <2 x i64> and we are
+    // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
+    // of elements 4-7 of the bitcasted vector.
+    unsigned NarrowingRatio = NumElts / NumSrcElts;
+    if (ExtIndexC / NarrowingRatio != InsIndexC)
+      return nullptr;
+
+    // We are extracting part of the original scalar. How that scalar is
+    // inserted into the vector depends on the endian-ness. Example:
+    //              Vector Byte Elt Index:    0  1  2  3  4  5  6  7
+    //                                       +--+--+--+--+--+--+--+--+
+    // inselt <2 x i32> V, <i32> S, 1:       |V0|V1|V2|V3|S0|S1|S2|S3|
+    // extelt <4 x i16> V', 3:               |                 |S2|S3|
+    //                                       +--+--+--+--+--+--+--+--+
+    // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
+    // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
+    // In this example, we must right-shift little-endian. Big-endian is just a
+    // truncate.
+    unsigned Chunk = ExtIndexC % NarrowingRatio;
+    if (IsBigEndian)
+      Chunk = NarrowingRatio - 1 - Chunk;
+    unsigned ShAmt = Chunk * DestTy->getPrimitiveSizeInBits();
+    if (ShAmt) {
+      // Bail out if we could end with more instructions than we started with.
+      if (!Ext.getVectorOperand()->hasOneUse())
+        return nullptr;
+      Scalar = Builder.CreateLShr(Scalar, ShAmt);
+    }
+    return new TruncInst(Scalar, DestTy);
+  }
+
   return nullptr;
 }
 
@@ -224,7 +268,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
       }
     }
 
-    if (Instruction *I = foldBitcastExtElt(EI, Builder))
+    if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian()))
       return I;
 
     // If there's a vector PHI feeding a scalar use through this extractelement
author	Sanjay Patel <spatel@rotateright.com>	2018-10-01 14:40:00 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2018-10-01 14:40:00 +0000
commit	31b07198f190f3c8a1724b5451327b0f0451c9d3 (patch)
tree	3fab70c7c5f853227e7d671138061d610a73e727 /llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
parent	1743ebe369f94289d418dd67b18aec26f2ef10b2 (diff)
download	bcm5719-llvm-31b07198f190f3c8a1724b5451327b0f0451c9d3.tar.gz bcm5719-llvm-31b07198f190f3c8a1724b5451327b0f0451c9d3.zip