X86 CodeGenPrep: sink shufflevectors before shifts

On x86, shifting a vector by a scalar is significantly cheaper than shifting a vector by another fully general vector. Unfortunately, because SelectionDAG operates on just one basic block at a time, the shufflevector instruction that reveals whether the right-hand side of a shift *is* really a scalar is often not visible to CodeGen when it's needed. This adds another handler to CodeGenPrepare, to sink any useful shufflevector instructions down to the basic block where they're used, predicated on a target hook (since on other architectures, doing so will often just introduce extra real work). rdar://problem/16063505 llvm-svn: 201655
author: Tim Northover <tnorthover@apple.com> 2014-02-19 10:02:43 +0000
committer: Tim Northover <tnorthover@apple.com> 2014-02-19 10:02:43 +0000
commit: aeb8e06d4c14ef02ddce2d8d29d14b2606545011 (patch)
tree: 138388481f585e6b2741dcce8458b1b85860f0f3 /llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
parent: ec931ecf12007cb94e53b3cc27fb5f559186e40c (diff)
download: bcm5719-llvm-aeb8e06d4c14ef02ddce2d8d29d14b2606545011.tar.gz
bcm5719-llvm-aeb8e06d4c14ef02ddce2d8d29d14b2606545011.zip
1 files changed, 72 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 0fde256943d..3c9ecce8e3e 100644
--- a/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -132,6 +132,7 @@ typedef DenseMap<Instruction *, Type *> InstrToOrigTy;
     bool MoveExtToFormExtLoad(Instruction *I);
     bool OptimizeExtUses(Instruction *I);
     bool OptimizeSelectInst(SelectInst *SI);
+    bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI);
     bool DupRetToEnableTailCallOpts(BasicBlock *BB);
     bool PlaceDbgValues(Function &F);
   };
@@ -2719,6 +2720,74 @@ bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
   return true;
 }
 
+
+bool isBroadcastShuffle(ShuffleVectorInst *SVI) {
+  SmallVector<int, 16> Mask(SVI->getShuffleMask());
+  int SplatElem = -1;
+  for (unsigned i = 0; i < Mask.size(); ++i) {
+    if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem)
+      return false;
+    SplatElem = Mask[i];
+  }
+
+  return true;
+}
+
+/// Some targets have expensive vector shifts if the lanes aren't all the same
+/// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases
+/// it's often worth sinking a shufflevector splat down to its use so that
+/// codegen can spot all lanes are identical.
+bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
+  BasicBlock *DefBB = SVI->getParent();
+
+  // Only do this xform if variable vector shifts are particularly expensive.
+  if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType()))
+    return false;
+
+  // We only expect better codegen by sinking a shuffle if we can recognise a
+  // constant splat.
+  if (!isBroadcastShuffle(SVI))
+    return false;
+
+  // InsertedShuffles - Only insert a shuffle in each block once.
+  DenseMap<BasicBlock*, Instruction*> InsertedShuffles;
+
+  bool MadeChange = false;
+  for (Value::use_iterator UI = SVI->use_begin(), E = SVI->use_end();
+       UI != E; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+
+    // Figure out which BB this ext is used in.
+    BasicBlock *UserBB = User->getParent();
+    if (UserBB == DefBB) continue;
+
+    // For now only apply this when the splat is used by a shift instruction.
+    if (!User->isShift()) continue;
+
+    // Everything checks out, sink the shuffle if the user's block doesn't
+    // already have a copy.
+    Instruction *&InsertedShuffle = InsertedShuffles[UserBB];
+
+    if (!InsertedShuffle) {
+      BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+      InsertedShuffle = new ShuffleVectorInst(SVI->getOperand(0),
+                                              SVI->getOperand(1),
+                                              SVI->getOperand(2), "", InsertPt);
+    }
+
+    User->replaceUsesOfWith(SVI, InsertedShuffle);
+    MadeChange = true;
+  }
+
+  // If we removed all uses, nuke the shuffle.
+  if (SVI->use_empty()) {
+    SVI->eraseFromParent();
+    MadeChange = true;
+  }
+
+  return MadeChange;
+}
+
 bool CodeGenPrepare::OptimizeInst(Instruction *I) {
   if (PHINode *P = dyn_cast<PHINode>(I)) {
     // It is possible for very late stage optimizations (such as SimplifyCFG)
@@ -2791,6 +2860,9 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
   if (SelectInst *SI = dyn_cast<SelectInst>(I))
     return OptimizeSelectInst(SI);
 
+  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
+    return OptimizeShuffleVectorInst(SVI);
+
   return false;
 }
author	Tim Northover <tnorthover@apple.com>	2014-02-19 10:02:43 +0000
committer	Tim Northover <tnorthover@apple.com>	2014-02-19 10:02:43 +0000
commit	aeb8e06d4c14ef02ddce2d8d29d14b2606545011 (patch)
tree	138388481f585e6b2741dcce8458b1b85860f0f3 /llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
parent	ec931ecf12007cb94e53b3cc27fb5f559186e40c (diff)
download	bcm5719-llvm-aeb8e06d4c14ef02ddce2d8d29d14b2606545011.tar.gz bcm5719-llvm-aeb8e06d4c14ef02ddce2d8d29d14b2606545011.zip