summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen/CodeGenPrepare.cpp
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2019-06-16 15:29:03 +0000
committerSanjay Patel <spatel@rotateright.com>2019-06-16 15:29:03 +0000
commitc8d88ad1a916d7e877f28bce752e8f63b73b164a (patch)
treea8d00159554757b13b40c0ff29bf97c71014edc8 /llvm/lib/CodeGen/CodeGenPrepare.cpp
parent33b46a6df0b0c781eb8c6b93b0aac55680a26eff (diff)
downloadbcm5719-llvm-c8d88ad1a916d7e877f28bce752e8f63b73b164a.tar.gz
bcm5719-llvm-c8d88ad1a916d7e877f28bce752e8f63b73b164a.zip
[CodeGenPrepare][x86] shift both sides of a vector select when profitable
This is based on the example/discussion in PR37428: https://bugs.llvm.org/show_bug.cgi?id=37428 Proper vector shift instructions don't appear until AVX2, so we may generate several extra instructions within a loop trying to compensate for that. It's difficult to recover from that shift expansion later than this, so use the existing TLI hook and splat analysis to enable better codegen. This extends CGP functionality introduced with: rL201655 Differential Revision: https://reviews.llvm.org/D63233 llvm-svn: 363511
Diffstat (limited to 'llvm/lib/CodeGen/CodeGenPrepare.cpp')
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp45
1 files changed, 42 insertions, 3 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 56f6629f5b1..f2df4977f16 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -32,6 +32,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -363,6 +364,7 @@ class TypePromotionTransaction;
bool optimizeExt(Instruction *&I);
bool optimizeExtUses(Instruction *I);
bool optimizeLoadExt(LoadInst *Load);
+ bool optimizeShiftInst(BinaryOperator *BO);
bool optimizeSelectInst(SelectInst *SI);
bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
bool optimizeSwitchInst(SwitchInst *SI);
@@ -5917,6 +5919,39 @@ static Value *getTrueOrFalseValue(
return V;
}
+bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
+ assert(Shift->isShift() && "Expected a shift");
+
+ // If this is (1) a vector shift, (2) shifts by scalars are cheaper than
+ // general vector shifts, and (3) the shift amount is a select-of-splatted
+ // values, hoist the shifts before the select:
+ // shift Op0, (select Cond, TVal, FVal) -->
+ // select Cond, (shift Op0, TVal), (shift Op0, FVal)
+ //
+ // This is inverting a generic IR transform when we know that the cost of a
+ // general vector shift is more than the cost of 2 shift-by-scalars.
+ // We can't do this effectively in SDAG because we may not be able to
+ // determine if the select operands are splats from within a basic block.
+ Type *Ty = Shift->getType();
+ if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty))
+ return false;
+ Value *Cond, *TVal, *FVal;
+ if (!match(Shift->getOperand(1),
+ m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
+ return false;
+ if (!isSplatValue(TVal) || !isSplatValue(FVal))
+ return false;
+
+ IRBuilder<> Builder(Shift);
+ BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
+ Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
+ Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
+ Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
+ Shift->replaceAllUsesWith(NewSel);
+ Shift->eraseFromParent();
+ return true;
+}
+
/// If we have a SelectInst that will likely profit from branch prediction,
/// turn it into a branch.
bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
@@ -6988,13 +7023,13 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
EnableAndCmpSinking && TLI)
return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts);
+ // TODO: Move this into the switch on opcode - it handles shifts already.
if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
BinOp->getOpcode() == Instruction::LShr)) {
ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
if (TLI && CI && TLI->hasExtractBitsInsn())
- return OptimizeExtractBits(BinOp, CI, *TLI, *DL);
-
- return false;
+ if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
+ return true;
}
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
@@ -7019,6 +7054,10 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
return true;
switch (I->getOpcode()) {
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ return optimizeShiftInst(cast<BinaryOperator>(I));
case Instruction::Call:
return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
case Instruction::Select:
OpenPOWER on IntegriCloud