[CodeGenPrepare][x86] shift both sides of a vector select when profitable

This is based on the example/discussion in PR37428: https://bugs.llvm.org/show_bug.cgi?id=37428 Proper vector shift instructions don't appear until AVX2, so we may generate several extra instructions within a loop trying to compensate for that. It's difficult to recover from that shift expansion later than this, so use the existing TLI hook and splat analysis to enable better codegen. This extends CGP functionality introduced with: rL201655 Differential Revision: https://reviews.llvm.org/D63233 llvm-svn: 363511
author: Sanjay Patel <spatel@rotateright.com> 2019-06-16 15:29:03 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2019-06-16 15:29:03 +0000
commit: c8d88ad1a916d7e877f28bce752e8f63b73b164a (patch)
tree: a8d00159554757b13b40c0ff29bf97c71014edc8 /llvm/lib/CodeGen/CodeGenPrepare.cpp
parent: 33b46a6df0b0c781eb8c6b93b0aac55680a26eff (diff)
download: bcm5719-llvm-c8d88ad1a916d7e877f28bce752e8f63b73b164a.tar.gz
bcm5719-llvm-c8d88ad1a916d7e877f28bce752e8f63b73b164a.zip
1 files changed, 42 insertions, 3 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 56f6629f5b1..f2df4977f16 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -32,6 +32,7 @@
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -363,6 +364,7 @@ class TypePromotionTransaction;
     bool optimizeExt(Instruction *&I);
     bool optimizeExtUses(Instruction *I);
     bool optimizeLoadExt(LoadInst *Load);
+    bool optimizeShiftInst(BinaryOperator *BO);
     bool optimizeSelectInst(SelectInst *SI);
     bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
     bool optimizeSwitchInst(SwitchInst *SI);
@@ -5917,6 +5919,39 @@ static Value *getTrueOrFalseValue(
   return V;
 }
 
+bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
+  assert(Shift->isShift() && "Expected a shift");
+
+  // If this is (1) a vector shift, (2) shifts by scalars are cheaper than
+  // general vector shifts, and (3) the shift amount is a select-of-splatted
+  // values, hoist the shifts before the select:
+  //   shift Op0, (select Cond, TVal, FVal) -->
+  //   select Cond, (shift Op0, TVal), (shift Op0, FVal)
+  //
+  // This is inverting a generic IR transform when we know that the cost of a
+  // general vector shift is more than the cost of 2 shift-by-scalars.
+  // We can't do this effectively in SDAG because we may not be able to
+  // determine if the select operands are splats from within a basic block.
+  Type *Ty = Shift->getType();
+  if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty))
+    return false;
+  Value *Cond, *TVal, *FVal;
+  if (!match(Shift->getOperand(1),
+             m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
+    return false;
+  if (!isSplatValue(TVal) || !isSplatValue(FVal))
+    return false;
+
+  IRBuilder<> Builder(Shift);
+  BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
+  Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
+  Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
+  Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
+  Shift->replaceAllUsesWith(NewSel);
+  Shift->eraseFromParent();
+  return true;
+}
+
 /// If we have a SelectInst that will likely profit from branch prediction,
 /// turn it into a branch.
 bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
@@ -6988,13 +7023,13 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
       EnableAndCmpSinking && TLI)
     return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts);
 
+  // TODO: Move this into the switch on opcode - it handles shifts already.
   if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
                 BinOp->getOpcode() == Instruction::LShr)) {
     ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
     if (TLI && CI && TLI->hasExtractBitsInsn())
-      return OptimizeExtractBits(BinOp, CI, *TLI, *DL);
-
-    return false;
+      if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
+        return true;
   }
 
   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
@@ -7019,6 +7054,10 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
     return true;
 
   switch (I->getOpcode()) {
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+    return optimizeShiftInst(cast<BinaryOperator>(I));
   case Instruction::Call:
     return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
   case Instruction::Select:
author	Sanjay Patel <spatel@rotateright.com>	2019-06-16 15:29:03 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2019-06-16 15:29:03 +0000
commit	c8d88ad1a916d7e877f28bce752e8f63b73b164a (patch)
tree	a8d00159554757b13b40c0ff29bf97c71014edc8 /llvm/lib/CodeGen/CodeGenPrepare.cpp
parent	33b46a6df0b0c781eb8c6b93b0aac55680a26eff (diff)
download	bcm5719-llvm-c8d88ad1a916d7e877f28bce752e8f63b73b164a.tar.gz bcm5719-llvm-c8d88ad1a916d7e877f28bce752e8f63b73b164a.zip