summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/InitializePasses.h1
-rw-r--r--llvm/include/llvm/Transforms/Scalar.h7
-rw-r--r--llvm/include/llvm/Transforms/Scalar/GVN.h7
-rw-r--r--llvm/include/llvm/Transforms/Utils/Local.h8
-rw-r--r--llvm/lib/Transforms/IPO/PassManagerBuilder.cpp9
-rw-r--r--llvm/lib/Transforms/Scalar/CMakeLists.txt1
-rw-r--r--llvm/lib/Transforms/Scalar/GVNSink.cpp870
-rw-r--r--llvm/lib/Transforms/Scalar/Scalar.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/Local.cpp45
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp47
-rw-r--r--llvm/test/Transforms/GVNSink/dither.ll42
-rw-r--r--llvm/test/Transforms/GVNSink/indirect-call.ll70
-rw-r--r--llvm/test/Transforms/GVNSink/sink-common-code.ll694
-rw-r--r--llvm/test/Transforms/GVNSink/struct.ll71
14 files changed, 1825 insertions, 48 deletions
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 3df5244a0bd..a39a777749b 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -144,6 +144,7 @@ void initializeGCMachineCodeAnalysisPass(PassRegistry&);
void initializeGCModuleInfoPass(PassRegistry&);
void initializeGCOVProfilerLegacyPassPass(PassRegistry&);
void initializeGVNHoistLegacyPassPass(PassRegistry&);
+void initializeGVNSinkLegacyPassPass(PassRegistry&);
void initializeGVNLegacyPassPass(PassRegistry&);
void initializeGlobalDCELegacyPassPass(PassRegistry&);
void initializeGlobalMergePass(PassRegistry&);
diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
index ba0a3ee1287..856c288a071 100644
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -356,6 +356,13 @@ FunctionPass *createGVNHoistPass();
//===----------------------------------------------------------------------===//
//
+// GVNSink - This pass uses an "inverted" value numbering to decide the
+// similarity of expressions and sinks similar expressions into successors.
+//
+FunctionPass *createGVNSinkPass();
+
+//===----------------------------------------------------------------------===//
+//
// MergedLoadStoreMotion - This pass merges loads and stores in diamonds. Loads
// are hoisted into the header, while stores sink into the footer.
//
diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h
index 8f05e8cdb23..589aaaca02f 100644
--- a/llvm/include/llvm/Transforms/Scalar/GVN.h
+++ b/llvm/include/llvm/Transforms/Scalar/GVN.h
@@ -238,7 +238,12 @@ struct GVNHoistPass : PassInfoMixin<GVNHoistPass> {
/// \brief Run the pass over the function.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
-
+/// \brief Uses an "inverted" value numbering to decide the similarity of
+/// expressions and sinks similar expressions into successors.
+struct GVNSinkPass : PassInfoMixin<GVNSinkPass> {
+ /// \brief Run the pass over the function.
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
}
#endif
diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h
index 65fffc530b3..8942111307f 100644
--- a/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/llvm/include/llvm/Transforms/Utils/Local.h
@@ -410,6 +410,14 @@ bool recognizeBSwapOrBitReverseIdiom(
void maybeMarkSanitizerLibraryCallNoBuiltin(CallInst *CI,
const TargetLibraryInfo *TLI);
+//===----------------------------------------------------------------------===//
+// Transform predicates
+//
+
+/// Given an instruction, is it legal to set operand OpIdx to a non-constant
+/// value?
+bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx);
+
} // End llvm namespace
#endif
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index ec06d5f9fb0..054df86b094 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -155,6 +155,10 @@ static cl::opt<bool>
cl::Hidden,
cl::desc("Enable the simple loop unswitch pass."));
+static cl::opt<bool> EnableGVNSink(
+ "enable-gvn-sink", cl::init(false), cl::Hidden,
+ cl::desc("Enable the GVN sinking pass (default = on)"));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
@@ -307,6 +311,11 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
if (EnableGVNHoist)
MPM.add(createGVNHoistPass());
+ if (EnableGVNSink) {
+ MPM.add(createGVNSinkPass());
+ MPM.add(createCFGSimplificationPass());
+ }
+
// Speculative execution if the target has divergent branches; otherwise nop.
MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass());
MPM.add(createJumpThreadingPass()); // Thread jumps.
diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt
index 52339075876..f5196cc4618 100644
--- a/llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -13,6 +13,7 @@ add_llvm_library(LLVMScalarOpts
GuardWidening.cpp
GVN.cpp
GVNHoist.cpp
+ GVNSink.cpp
IVUsersPrinter.cpp
InductiveRangeCheckElimination.cpp
IndVarSimplify.cpp
diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp
new file mode 100644
index 00000000000..cd5b54b35ce
--- /dev/null
+++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp
@@ -0,0 +1,870 @@
+//===- GVNSink.cpp - sink expressions into successors -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file GVNSink.cpp
+/// This pass attempts to sink instructions into successors, reducing static
+/// instruction count and enabling if-conversion.
+///
+/// We use a variant of global value numbering to decide what can be sunk.
+/// Consider:
+///
+/// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ]
+/// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ]
+/// \ /
+/// [ %e = phi i32 %a2, %c2 ]
+/// [ add i32 %e, 4 ]
+///
+///
+/// GVN would number %a1 and %c1 differently because they compute different
+/// results - the VN of an instruction is a function of its opcode and the
+/// transitive closure of its operands. This is the key property for hoisting
+/// and CSE.
+///
+/// What we want when sinking however is for a numbering that is a function of
+/// the *uses* of an instruction, which allows us to answer the question "if I
+/// replace %a1 with %c1, will it contribute in an equivalent way to all
+/// successive instructions?". The PostValueTable class in GVN provides this
+/// mapping.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Scalar/GVNExpression.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <unordered_set>
+using namespace llvm;
+
+#define DEBUG_TYPE "gvn-sink"
+
+STATISTIC(NumRemoved, "Number of instructions removed");
+
+namespace {
+
+static bool isMemoryInst(const Instruction *I) {
+ return isa<LoadInst>(I) || isa<StoreInst>(I) ||
+ (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
+ (isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory());
+}
+
+/// Iterates through instructions in a set of blocks in reverse order from the
+/// first non-terminator. For example (assume all blocks have size n):
+/// LockstepReverseIterator I([B1, B2, B3]);
+/// *I-- = [B1[n], B2[n], B3[n]];
+/// *I-- = [B1[n-1], B2[n-1], B3[n-1]];
+/// *I-- = [B1[n-2], B2[n-2], B3[n-2]];
+/// ...
+///
+/// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
+/// to
+/// determine which blocks are still going and the order they appear in the
+/// list returned by operator*.
+class LockstepReverseIterator {
+ ArrayRef<BasicBlock *> Blocks;
+ SmallPtrSet<BasicBlock *, 4> ActiveBlocks;
+ SmallVector<Instruction *, 4> Insts;
+ bool Fail;
+
+public:
+ LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
+ reset();
+ }
+
+ void reset() {
+ Fail = false;
+ ActiveBlocks.clear();
+ for (BasicBlock *BB : Blocks)
+ ActiveBlocks.insert(BB);
+ Insts.clear();
+ for (BasicBlock *BB : Blocks) {
+ if (BB->size() <= 1) {
+ // Block wasn't big enough - only contained a terminator.
+ ActiveBlocks.erase(BB);
+ continue;
+ }
+ Insts.push_back(BB->getTerminator()->getPrevNode());
+ }
+ if (Insts.empty())
+ Fail = true;
+ }
+
+ bool isValid() const { return !Fail; }
+ ArrayRef<Instruction *> operator*() const { return Insts; }
+ SmallPtrSet<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; }
+
+ void restrictToBlocks(SmallPtrSetImpl<BasicBlock *> &Blocks) {
+ for (auto II = Insts.begin(); II != Insts.end();) {
+ if (std::find(Blocks.begin(), Blocks.end(), (*II)->getParent()) ==
+ Blocks.end()) {
+ ActiveBlocks.erase((*II)->getParent());
+ II = Insts.erase(II);
+ } else {
+ ++II;
+ }
+ }
+ }
+
+ void operator--() {
+ if (Fail)
+ return;
+ SmallVector<Instruction *, 4> NewInsts;
+ for (auto *Inst : Insts) {
+ if (Inst == &Inst->getParent()->front())
+ ActiveBlocks.erase(Inst->getParent());
+ else
+ NewInsts.push_back(Inst->getPrevNode());
+ }
+ if (NewInsts.empty()) {
+ Fail = true;
+ return;
+ }
+ Insts = NewInsts;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+
+/// Candidate solution for sinking. There may be different ways to
+/// sink instructions, differing in the number of instructions sunk,
+/// the number of predecessors sunk from and the number of PHIs
+/// required.
+struct SinkingInstructionCandidate {
+ unsigned NumBlocks;
+ unsigned NumInstructions;
+ unsigned NumPHIs;
+ unsigned NumMemoryInsts;
+ int Cost = -1;
+ SmallVector<BasicBlock *, 4> Blocks;
+
+ void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
+ unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
+ unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
+ Cost = (NumInstructions * (NumBlocks - 1)) -
+ (NumExtraPHIs *
+ NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
+ - SplitEdgeCost;
+ }
+ bool operator>=(const SinkingInstructionCandidate &Other) const {
+ return Cost >= Other.Cost;
+ }
+};
+
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+ const SinkingInstructionCandidate &C) {
+ OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
+ << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
+ return OS;
+}
+
+//===----------------------------------------------------------------------===//
+
+/// Describes a PHI node that may or may not exist. These track the PHIs
+/// that must be created if we sunk a sequence of instructions. It provides
+/// a hash function for efficient equality comparisons.
+class ModelledPHI {
+ SmallVector<Value *, 4> Values;
+ SmallVector<BasicBlock *, 4> Blocks;
+
+public:
+ ModelledPHI() {}
+ ModelledPHI(const PHINode *PN) {
+ for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
+ Blocks.push_back(PN->getIncomingBlock(I));
+ std::sort(Blocks.begin(), Blocks.end());
+
+ // This assumes the PHI is already well-formed and there aren't conflicting
+ // incoming values for the same block.
+ for (auto *B : Blocks)
+ Values.push_back(PN->getIncomingValueForBlock(B));
+ }
+ /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
+ /// without the same ID.
+ /// \note This is specifically for DenseMapInfo - do not use this!
+ static ModelledPHI createDummy(unsigned ID) {
+ ModelledPHI M;
+ M.Values.push_back(reinterpret_cast<Value*>(ID));
+ return M;
+ }
+
+ /// Create a PHI from an array of incoming values and incoming blocks.
+ template <typename VArray, typename BArray>
+ ModelledPHI(const VArray &V, const BArray &B) {
+ std::copy(V.begin(), V.end(), std::back_inserter(Values));
+ std::copy(B.begin(), B.end(), std::back_inserter(Blocks));
+ }
+
+ /// Create a PHI from [I[OpNum] for I in Insts].
+ template <typename BArray>
+ ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) {
+ std::copy(B.begin(), B.end(), std::back_inserter(Blocks));
+ for (auto *I : Insts)
+ Values.push_back(I->getOperand(OpNum));
+ }
+
+ /// Restrict the PHI's contents down to only \c NewBlocks.
+ /// \c NewBlocks must be a subset of \c this->Blocks.
+ void restrictToBlocks(const SmallPtrSetImpl<BasicBlock *> &NewBlocks) {
+ auto BI = Blocks.begin();
+ auto VI = Values.begin();
+ while (BI != Blocks.end()) {
+ assert(VI != Values.end());
+ if (std::find(NewBlocks.begin(), NewBlocks.end(), *BI) ==
+ NewBlocks.end()) {
+ BI = Blocks.erase(BI);
+ VI = Values.erase(VI);
+ } else {
+ ++BI;
+ ++VI;
+ }
+ }
+ assert(Blocks.size() == NewBlocks.size());
+ }
+
+ ArrayRef<Value *> getValues() const { return Values; }
+
+ bool areAllIncomingValuesSame() const {
+ return all_of(Values, [&](Value *V) { return V == Values[0]; });
+ }
+ bool areAllIncomingValuesSameType() const {
+ return all_of(
+ Values, [&](Value *V) { return V->getType() == Values[0]->getType(); });
+ }
+ bool areAnyIncomingValuesConstant() const {
+ return any_of(Values, [&](Value *V) { return isa<Constant>(V); });
+ }
+ // Hash functor
+ unsigned hash() const {
+ return (unsigned)hash_combine_range(Values.begin(), Values.end());
+ }
+ bool operator==(const ModelledPHI &Other) const {
+ return Values == Other.Values && Blocks == Other.Blocks;
+ }
+};
+
+template <typename ModelledPHI> struct DenseMapInfo {
+ static inline ModelledPHI &getEmptyKey() {
+ static ModelledPHI Dummy = ModelledPHI::createDummy(0);
+ return Dummy;
+ }
+ static inline ModelledPHI &getTombstoneKey() {
+ static ModelledPHI Dummy = ModelledPHI::createDummy(1);
+ return Dummy;
+ }
+ static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
+ static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
+ return LHS == RHS;
+ }
+};
+
+typedef DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>> ModelledPHISet;
+
+//===----------------------------------------------------------------------===//
+// ValueTable
+//===----------------------------------------------------------------------===//
+// This is a value number table where the value number is a function of the
+// *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
+// that the program would be equivalent if we replaced A with PHI(A, B).
+//===----------------------------------------------------------------------===//
+
+/// A GVN expression describing how an instruction is used. The operands
+/// field of BasicExpression is used to store uses, not operands.
+///
+/// This class also contains fields for discriminators used when determining
+/// equivalence of instructions with sideeffects.
+class InstructionUseExpr : public GVNExpression::BasicExpression {
+ unsigned MemoryUseOrder = -1;
+ bool Volatile = false;
+
+public:
+ InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
+ BumpPtrAllocator &A)
+ : GVNExpression::BasicExpression(I->getNumUses()) {
+ allocateOperands(R, A);
+ setOpcode(I->getOpcode());
+ setType(I->getType());
+
+ for (auto &U : I->uses())
+ op_push_back(U.getUser());
+ std::sort(op_begin(), op_end());
+ }
+ void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
+ void setVolatile(bool V) { Volatile = V; }
+
+ virtual hash_code getHashValue() const {
+ return hash_combine(GVNExpression::BasicExpression::getHashValue(),
+ MemoryUseOrder, Volatile);
+ }
+
+ template <typename Function> hash_code getHashValue(Function MapFn) {
+ hash_code H =
+ hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile);
+ for (auto *V : operands())
+ H = hash_combine(H, MapFn(V));
+ return H;
+ }
+};
+
+class ValueTable {
+ DenseMap<Value *, uint32_t> ValueNumbering;
+ DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
+ DenseMap<size_t, uint32_t> HashNumbering;
+ BumpPtrAllocator Allocator;
+ ArrayRecycler<Value *> Recycler;
+ uint32_t nextValueNumber;
+
+ /// Create an expression for I based on its opcode and its uses. If I
+ /// touches or reads memory, the expression is also based upon its memory
+ /// order - see \c getMemoryUseOrder().
+ InstructionUseExpr *createExpr(Instruction *I) {
+ InstructionUseExpr *E =
+ new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
+ if (isMemoryInst(I))
+ E->setMemoryUseOrder(getMemoryUseOrder(I));
+
+ if (CmpInst *C = dyn_cast<CmpInst>(I)) {
+ CmpInst::Predicate Predicate = C->getPredicate();
+ E->setOpcode((C->getOpcode() << 8) | Predicate);
+ }
+ return E;
+ }
+
+ /// Helper to compute the value number for a memory instruction
+ /// (LoadInst/StoreInst), including checking the memory ordering and
+ /// volatility.
+ template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
+ if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
+ return nullptr;
+ InstructionUseExpr *E = createExpr(I);
+ E->setVolatile(I->isVolatile());
+ return E;
+ }
+
+public:
+ /// Returns the value number for the specified value, assigning
+ /// it a new number if it did not have one before.
+ uint32_t lookupOrAdd(Value *V) {
+ auto VI = ValueNumbering.find(V);
+ if (VI != ValueNumbering.end())
+ return VI->second;
+
+ if (!isa<Instruction>(V)) {
+ ValueNumbering[V] = nextValueNumber;
+ return nextValueNumber++;
+ }
+
+ Instruction *I = cast<Instruction>(V);
+ InstructionUseExpr *exp = nullptr;
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ exp = createMemoryExpr(cast<LoadInst>(I));
+ break;
+ case Instruction::Store:
+ exp = createMemoryExpr(cast<StoreInst>(I));
+ break;
+ case Instruction::Call:
+ case Instruction::Invoke:
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ case Instruction::Select:
+ case Instruction::ExtractElement:
+ case Instruction::InsertElement:
+ case Instruction::ShuffleVector:
+ case Instruction::InsertValue:
+ case Instruction::GetElementPtr:
+ exp = createExpr(I);
+ break;
+ default:
+ break;
+ }
+
+ if (!exp) {
+ ValueNumbering[V] = nextValueNumber;
+ return nextValueNumber++;
+ }
+
+ uint32_t e = ExpressionNumbering[exp];
+ if (!e) {
+ hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); });
+ auto I = HashNumbering.find(H);
+ if (I != HashNumbering.end()) {
+ e = I->second;
+ } else {
+ e = nextValueNumber++;
+ HashNumbering[H] = e;
+ ExpressionNumbering[exp] = e;
+ }
+ }
+ ValueNumbering[V] = e;
+ return e;
+ }
+
+ /// Returns the value number of the specified value. Fails if the value has
+ /// not yet been numbered.
+ uint32_t lookup(Value *V) const {
+ auto VI = ValueNumbering.find(V);
+ assert(VI != ValueNumbering.end() && "Value not numbered?");
+ return VI->second;
+ }
+
+ /// Removes all value numberings and resets the value table.
+ void clear() {
+ ValueNumbering.clear();
+ ExpressionNumbering.clear();
+ HashNumbering.clear();
+ Recycler.clear(Allocator);
+ nextValueNumber = 1;
+ }
+
+ ValueTable() : nextValueNumber(1) {}
+
+ /// \c Inst uses or touches memory. Return an ID describing the memory state
+ /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
+ /// the exact same memory operations happen after I1 and I2.
+ ///
+ /// This is a very hard problem in general, so we use domain-specific
+ /// knowledge that we only ever check for equivalence between blocks sharing a
+ /// single immediate successor that is common, and when determining if I1 ==
+ /// I2 we will have already determined that next(I1) == next(I2). This
+ /// inductive property allows us to simply return the value number of the next
+ /// instruction that defines memory.
+ uint32_t getMemoryUseOrder(Instruction *Inst) {
+ auto *BB = Inst->getParent();
+ for (auto I = std::next(Inst->getIterator()), E = BB->end();
+ I != E && !I->isTerminator(); ++I) {
+ if (!isMemoryInst(&*I))
+ continue;
+ if (isa<LoadInst>(&*I))
+ continue;
+ CallInst *CI = dyn_cast<CallInst>(&*I);
+ if (CI && CI->onlyReadsMemory())
+ continue;
+ InvokeInst *II = dyn_cast<InvokeInst>(&*I);
+ if (II && II->onlyReadsMemory())
+ continue;
+ return lookupOrAdd(&*I);
+ }
+ return 0;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+
+class GVNSink {
+public:
+ GVNSink() : VN() {}
+ bool run(Function &F) {
+ DEBUG(dbgs() << "GVNSink: running on function @" << F.getName() << "\n");
+
+ unsigned NumSunk = 0;
+ ReversePostOrderTraversal<Function*> RPOT(&F);
+ for (auto *N : RPOT)
+ NumSunk += sinkBB(N);
+
+ return NumSunk > 0;
+ }
+
+private:
+ ValueTable VN;
+
+ bool isInstructionBlacklisted(Instruction *I) {
+ // These instructions may change or break semantics if moved.
+ if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
+ I->getType()->isTokenTy())
+ return true;
+ return false;
+ }
+
+ /// The main heuristic function. Analyze the set of instructions pointed to by
+ /// LRI and return a candidate solution if these instructions can be sunk, or
+ /// None otherwise.
+ Optional<SinkingInstructionCandidate> analyzeInstructionForSinking(
+ LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
+ ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents);
+
+ /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
+ void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
+ SmallPtrSetImpl<Value *> &PHIContents) {
+ for (auto &I : *BB) {
+ auto *PN = dyn_cast<PHINode>(&I);
+ if (!PN)
+ return;
+
+ auto MPHI = ModelledPHI(PN);
+ PHIs.insert(MPHI);
+ for (auto *V : MPHI.getValues())
+ PHIContents.insert(V);
+ }
+ }
+
+ /// The main instruction sinking driver. Set up state and try and sink
+ /// instructions into BBEnd from its predecessors.
+ unsigned sinkBB(BasicBlock *BBEnd);
+
+ /// Perform the actual mechanics of sinking an instruction from Blocks into
+ /// BBEnd, which is their only successor.
+ void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd);
+
+ /// Remove PHIs that all have the same incoming value.
+ void foldPointlessPHINodes(BasicBlock *BB) {
+ auto I = BB->begin();
+ while (PHINode *PN = dyn_cast<PHINode>(I++)) {
+ if (!all_of(PN->incoming_values(),
+ [&](const Value *V) { return V == PN->getIncomingValue(0); }))
+ continue;
+ if (PN->getIncomingValue(0) != PN)
+ PN->replaceAllUsesWith(PN->getIncomingValue(0));
+ else
+ PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+ PN->eraseFromParent();
+ }
+ }
+};
+
+Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
+ LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
+ ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents) {
+ auto Insts = *LRI;
+ DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
+ : Insts) {
+ I->dump();
+ } dbgs() << " ]\n";);
+
+ DenseMap<uint32_t, unsigned> VNums;
+ for (auto *I : Insts) {
+ uint32_t N = VN.lookupOrAdd(I);
+ DEBUG(dbgs() << " VN=" << utohexstr(N) << " for" << *I << "\n");
+ if (N == ~0U)
+ return None;
+ VNums[N]++;
+ }
+ unsigned VNumToSink =
+ std::max_element(VNums.begin(), VNums.end(),
+ [](const std::pair<uint32_t, unsigned> &I,
+ const std::pair<uint32_t, unsigned> &J) {
+ return I.second < J.second;
+ })
+ ->first;
+
+ if (VNums[VNumToSink] == 1)
+ // Can't sink anything!
+ return None;
+
+ // Now restrict the number of incoming blocks down to only those with
+ // VNumToSink.
+ auto &ActivePreds = LRI.getActiveBlocks();
+ unsigned InitialActivePredSize = ActivePreds.size();
+ SmallVector<Instruction *, 4> NewInsts;
+ for (auto *I : Insts) {
+ if (VN.lookup(I) != VNumToSink)
+ ActivePreds.erase(I->getParent());
+ else
+ NewInsts.push_back(I);
+ }
+ for (auto *I : NewInsts)
+ if (isInstructionBlacklisted(I))
+ return None;
+
+ // If we've restricted the incoming blocks, restrict all needed PHIs also
+ // to that set.
+ bool RecomputePHIContents = false;
+ if (ActivePreds.size() != InitialActivePredSize) {
+ ModelledPHISet NewNeededPHIs;
+ for (auto P : NeededPHIs) {
+ P.restrictToBlocks(ActivePreds);
+ NewNeededPHIs.insert(P);
+ }
+ NeededPHIs = NewNeededPHIs;
+ LRI.restrictToBlocks(ActivePreds);
+ RecomputePHIContents = true;
+ }
+
+ // The sunk instruction's results.
+ ModelledPHI NewPHI(NewInsts, ActivePreds);
+
+ // Does sinking this instruction render previous PHIs redundant?
+ if (NeededPHIs.find(NewPHI) != NeededPHIs.end()) {
+ NeededPHIs.erase(NewPHI);
+ RecomputePHIContents = true;
+ }
+
+ if (RecomputePHIContents) {
+ // The needed PHIs have changed, so recompute the set of all needed
+ // values.
+ PHIContents.clear();
+ for (auto &PHI : NeededPHIs)
+ PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
+ }
+
+ // Is this instruction required by a later PHI that doesn't match this PHI?
+ // if so, we can't sink this instruction.
+ for (auto *V : NewPHI.getValues())
+ if (PHIContents.count(V))
+ // V exists in this PHI, but the whole PHI is different to NewPHI
+ // (else it would have been removed earlier). We cannot continue
+ // because this isn't representable.
+ return None;
+
+ // Which operands need PHIs?
+ // FIXME: If any of these fail, we should partition up the candidates to
+ // try and continue making progress.
+ Instruction *I0 = NewInsts[0];
+ for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
+ ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
+ if (PHI.areAllIncomingValuesSame())
+ continue;
+ if (!canReplaceOperandWithVariable(I0, OpNum))
+ // We can 't create a PHI from this instruction!
+ return None;
+ if (NeededPHIs.count(PHI))
+ continue;
+ if (!PHI.areAllIncomingValuesSameType())
+ return None;
+ // Don't create indirect calls! The called value is the final operand.
+ if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 &&
+ PHI.areAnyIncomingValuesConstant())
+ return None;
+
+ NeededPHIs.reserve(NeededPHIs.size());
+ NeededPHIs.insert(PHI);
+ PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
+ }
+
+ if (isMemoryInst(NewInsts[0]))
+ ++MemoryInstNum;
+
+ SinkingInstructionCandidate Cand;
+ Cand.NumInstructions = ++InstNum;
+ Cand.NumMemoryInsts = MemoryInstNum;
+ Cand.NumBlocks = ActivePreds.size();
+ Cand.NumPHIs = NeededPHIs.size();
+ for (auto *C : ActivePreds)
+ Cand.Blocks.push_back(C);
+
+ return Cand;
+}
+
+unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
+ DEBUG(dbgs() << "GVNSink: running on basic block ";
+ BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
+ SmallVector<BasicBlock *, 4> Preds;
+ for (auto *B : predecessors(BBEnd)) {
+ auto *T = B->getTerminator();
+ if (isa<BranchInst>(T) || isa<SwitchInst>(T))
+ Preds.push_back(B);
+ else
+ return 0;
+ }
+ if (Preds.size() < 2)
+ return 0;
+ std::sort(Preds.begin(), Preds.end());
+
+ unsigned NumOrigPreds = Preds.size();
+ // We can only sink instructions through unconditional branches.
+ for (auto I = Preds.begin(); I != Preds.end();) {
+ if ((*I)->getTerminator()->getNumSuccessors() != 1)
+ I = Preds.erase(I);
+ else
+ ++I;
+ }
+
+ LockstepReverseIterator LRI(Preds);
+ SmallVector<SinkingInstructionCandidate, 4> Candidates;
+ unsigned InstNum = 0, MemoryInstNum = 0;
+ ModelledPHISet NeededPHIs;
+ SmallPtrSet<Value *, 4> PHIContents;
+ analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
+ unsigned NumOrigPHIs = NeededPHIs.size();
+
+ while (LRI.isValid()) {
+ auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
+ NeededPHIs, PHIContents);
+ if (!Cand)
+ break;
+ Cand->calculateCost(NumOrigPHIs, Preds.size());
+ Candidates.emplace_back(*Cand);
+ --LRI;
+ }
+
+ std::stable_sort(
+ Candidates.begin(), Candidates.end(),
+ [](const SinkingInstructionCandidate &A,
+ const SinkingInstructionCandidate &B) { return A >= B; });
+ DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
+ : Candidates) dbgs()
+ << " " << C << "\n";);
+
+ // Pick the top candidate, as long it is positive!
+ if (Candidates.empty() || Candidates.front().Cost <= 0)
+ return 0;
+ auto C = Candidates.front();
+
+ DEBUG(dbgs() << " -- Sinking: " << C << "\n");
+ BasicBlock *InsertBB = BBEnd;
+ if (C.Blocks.size() < NumOrigPreds) {
+ DEBUG(dbgs() << " -- Splitting edge to "; BBEnd->printAsOperand(dbgs());
+ dbgs() << "\n");
+ InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split");
+ if (!InsertBB) {
+ DEBUG(dbgs() << " -- FAILED to split edge!\n");
+ // Edge couldn't be split.
+ return 0;
+ }
+ }
+
+ for (unsigned I = 0; I < C.NumInstructions; ++I)
+ sinkLastInstruction(C.Blocks, InsertBB);
+
+ return C.NumInstructions;
+}
+
+void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
+ BasicBlock *BBEnd) {
+ SmallVector<Instruction *, 4> Insts;
+ for (BasicBlock *BB : Blocks)
+ Insts.push_back(BB->getTerminator()->getPrevNode());
+ Instruction *I0 = Insts.front();
+
+ SmallVector<Value *, 4> NewOperands;
+ for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
+ bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
+ return I->getOperand(O) != I0->getOperand(O);
+ });
+ if (!NeedPHI) {
+ NewOperands.push_back(I0->getOperand(O));
+ continue;
+ }
+
+ // Create a new PHI in the successor block and populate it.
+ auto *Op = I0->getOperand(O);
+ assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
+ auto *PN = PHINode::Create(Op->getType(), Insts.size(),
+ Op->getName() + ".sink", &BBEnd->front());
+ for (auto *I : Insts)
+ PN->addIncoming(I->getOperand(O), I->getParent());
+ NewOperands.push_back(PN);
+ }
+
+ // Arbitrarily use I0 as the new "common" instruction; remap its operands
+ // and move it to the start of the successor block.
+ for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
+ I0->getOperandUse(O).set(NewOperands[O]);
+ I0->moveBefore(&*BBEnd->getFirstInsertionPt());
+
+ // Update metadata and IR flags.
+ for (auto *I : Insts)
+ if (I != I0) {
+ combineMetadataForCSE(I0, I);
+ I0->andIRFlags(I);
+ }
+
+ for (auto *I : Insts)
+ if (I != I0)
+ I->replaceAllUsesWith(I0);
+ foldPointlessPHINodes(BBEnd);
+
+ // Finally nuke all instructions apart from the common instruction.
+ for (auto *I : Insts)
+ if (I != I0)
+ I->eraseFromParent();
+
+ NumRemoved += Insts.size() - 1;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Pass machinery / boilerplate
+
+class GVNSinkLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ GVNSinkLegacyPass() : FunctionPass(ID) {
+ initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+ GVNSink G;
+ return G.run(F);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
+} // namespace
+
+PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
+ GVNSink G;
+ if (!G.run(F))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserve<GlobalsAA>();
+ return PA;
+}
+
+char GVNSinkLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink",
+ "Early GVN sinking of Expressions", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink",
+ "Early GVN sinking of Expressions", false, false)
+
+FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); }
diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp
index 52201d8f3e5..9fa43da99da 100644
--- a/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -48,6 +48,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeEarlyCSELegacyPassPass(Registry);
initializeEarlyCSEMemSSALegacyPassPass(Registry);
initializeGVNHoistLegacyPassPass(Registry);
+ initializeGVNSinkLegacyPassPass(Registry);
initializeFlattenCFGPassPass(Registry);
initializeInductiveRangeCheckEliminationPass(Registry);
initializeIndVarSimplifyLegacyPassPass(Registry);
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index f28ed7c5caf..ebd528bc8ec 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -2109,3 +2109,48 @@ void llvm::maybeMarkSanitizerLibraryCallNoBuiltin(
!F->doesNotAccessMemory())
CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoBuiltin);
}
+
+bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {
+ // We can't have a PHI with a metadata type.
+ if (I->getOperand(OpIdx)->getType()->isMetadataTy())
+ return false;
+
+ // Early exit.
+ if (!isa<Constant>(I->getOperand(OpIdx)))
+ return true;
+
+ switch (I->getOpcode()) {
+ default:
+ return true;
+ case Instruction::Call:
+ case Instruction::Invoke:
+ // Many arithmetic intrinsics have no issue taking a
+ // variable, however it's hard to distingish these from
+ // specials such as @llvm.frameaddress that require a constant.
+ if (isa<IntrinsicInst>(I))
+ return false;
+
+ // Constant bundle operands may need to retain their constant-ness for
+ // correctness.
+ if (ImmutableCallSite(I).isBundleOperand(OpIdx))
+ return false;
+ return true;
+ case Instruction::ShuffleVector:
+ // Shufflevector masks are constant.
+ return OpIdx != 2;
+ case Instruction::ExtractValue:
+ case Instruction::InsertValue:
+ // All operands apart from the first are constant.
+ return OpIdx == 0;
+ case Instruction::Alloca:
+ return false;
+ case Instruction::GetElementPtr:
+ if (OpIdx == 0)
+ return true;
+ gep_type_iterator It = gep_type_begin(I);
+ for (auto E = std::next(It, OpIdx); It != E; ++It)
+ if (It.isStruct())
+ return false;
+ return true;
+ }
+}
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 6441cf89f6b..1b442a9a264 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1376,53 +1376,6 @@ HoistTerminator:
return true;
}
-// Is it legal to place a variable in operand \c OpIdx of \c I?
-// FIXME: This should be promoted to Instruction.
-static bool canReplaceOperandWithVariable(const Instruction *I,
- unsigned OpIdx) {
- // We can't have a PHI with a metadata type.
- if (I->getOperand(OpIdx)->getType()->isMetadataTy())
- return false;
-
- // Early exit.
- if (!isa<Constant>(I->getOperand(OpIdx)))
- return true;
-
- switch (I->getOpcode()) {
- default:
- return true;
- case Instruction::Call:
- case Instruction::Invoke:
- // FIXME: many arithmetic intrinsics have no issue taking a
- // variable, however it's hard to distingish these from
- // specials such as @llvm.frameaddress that require a constant.
- if (isa<IntrinsicInst>(I))
- return false;
-
- // Constant bundle operands may need to retain their constant-ness for
- // correctness.
- if (ImmutableCallSite(I).isBundleOperand(OpIdx))
- return false;
-
- return true;
-
- case Instruction::ShuffleVector:
- // Shufflevector masks are constant.
- return OpIdx != 2;
- case Instruction::ExtractValue:
- case Instruction::InsertValue:
- // All operands apart from the first are constant.
- return OpIdx == 0;
- case Instruction::Alloca:
- return false;
- case Instruction::GetElementPtr:
- if (OpIdx == 0)
- return true;
- gep_type_iterator It = std::next(gep_type_begin(I), OpIdx - 1);
- return It.isSequential();
- }
-}
-
// All instructions in Insts belong to different blocks that all unconditionally
// branch to a common successor. Analyze each instruction and return true if it
// would be possible to sink them into their successor, creating one common
diff --git a/llvm/test/Transforms/GVNSink/dither.ll b/llvm/test/Transforms/GVNSink/dither.ll
new file mode 100644
index 00000000000..9717021aca8
--- /dev/null
+++ b/llvm/test/Transforms/GVNSink/dither.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -S -gvn-sink | FileCheck %s
+
+; Because %tmp17 has flipped operands to its equivalents %tmp14 and %tmp7, we
+; can't sink the zext as we'd need a shuffling PHI in between.
+;
+; Just sinking the zext isn't profitable, so ensure nothing is sunk.
+
+; CHECK-LABEL: @hoge
+; CHECK-NOT: bb18.gvnsink.split
+define void @hoge() {
+bb:
+ br i1 undef, label %bb4, label %bb11
+
+bb4: ; preds = %bb3
+ br i1 undef, label %bb6, label %bb8
+
+bb6: ; preds = %bb5
+ %tmp = zext i16 undef to i64
+ %tmp7 = add i64 %tmp, undef
+ br label %bb18
+
+bb8: ; preds = %bb5
+ %tmp9 = zext i16 undef to i64
+ br label %bb18
+
+bb11: ; preds = %bb10
+ br i1 undef, label %bb12, label %bb15
+
+bb12: ; preds = %bb11
+ %tmp13 = zext i16 undef to i64
+ %tmp14 = add i64 %tmp13, undef
+ br label %bb18
+
+bb15: ; preds = %bb11
+ %tmp16 = zext i16 undef to i64
+ %tmp17 = add i64 undef, %tmp16
+ br label %bb18
+
+bb18: ; preds = %bb15, %bb12, %bb8, %bb6
+ %tmp19 = phi i64 [ %tmp7, %bb6 ], [ undef, %bb8 ], [ %tmp14, %bb12 ], [ %tmp17, %bb15 ]
+ unreachable
+}
diff --git a/llvm/test/Transforms/GVNSink/indirect-call.ll b/llvm/test/Transforms/GVNSink/indirect-call.ll
new file mode 100644
index 00000000000..da98ed0819a
--- /dev/null
+++ b/llvm/test/Transforms/GVNSink/indirect-call.ll
@@ -0,0 +1,70 @@
+; RUN: opt < %s -gvn-sink -simplifycfg -simplifycfg-sink-common=false -S | FileCheck %s
+
+declare i8 @ext(i1)
+
+define zeroext i1 @test1(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks, i8(i1)* %ext) {
+entry:
+ %cmp = icmp uge i32 %blksA, %nblks
+ br i1 %flag, label %if.then, label %if.else
+
+; CHECK-LABEL: test1
+; CHECK: call i8 @ext
+; CHECK: call i8 %ext
+if.then:
+ %frombool1 = call i8 @ext(i1 %cmp)
+ br label %if.end
+
+if.else:
+ %frombool3 = call i8 %ext(i1 %cmp)
+ br label %if.end
+
+if.end:
+ %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ]
+ %tobool4 = icmp ne i8 %obeys.0, 0
+ ret i1 %tobool4
+}
+
+define zeroext i1 @test2(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks, i8(i1)* %ext) {
+entry:
+ %cmp = icmp uge i32 %blksA, %nblks
+ br i1 %flag, label %if.then, label %if.else
+
+; CHECK-LABEL: test2
+; CHECK: call i8 %ext
+; CHECK-NOT: call
+if.then:
+ %frombool1 = call i8 %ext(i1 %cmp)
+ br label %if.end
+
+if.else:
+ %frombool3 = call i8 %ext(i1 %cmp)
+ br label %if.end
+
+if.end:
+ %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ]
+ %tobool4 = icmp ne i8 %obeys.0, 0
+ ret i1 %tobool4
+}
+
+define zeroext i1 @test3(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks, i8(i1)* %ext1, i8(i1)* %ext2) {
+entry:
+ %cmp = icmp uge i32 %blksA, %nblks
+ br i1 %flag, label %if.then, label %if.else
+
+; CHECK-LABEL: test3
+; CHECK: %[[x:.*]] = select i1 %flag, i8 (i1)* %ext1, i8 (i1)* %ext2
+; CHECK: call i8 %[[x]](i1 %cmp)
+; CHECK-NOT: call
+if.then:
+ %frombool1 = call i8 %ext1(i1 %cmp)
+ br label %if.end
+
+if.else:
+ %frombool3 = call i8 %ext2(i1 %cmp)
+ br label %if.end
+
+if.end:
+ %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ]
+ %tobool4 = icmp ne i8 %obeys.0, 0
+ ret i1 %tobool4
+}
diff --git a/llvm/test/Transforms/GVNSink/sink-common-code.ll b/llvm/test/Transforms/GVNSink/sink-common-code.ll
new file mode 100644
index 00000000000..d9e757cd10f
--- /dev/null
+++ b/llvm/test/Transforms/GVNSink/sink-common-code.ll
@@ -0,0 +1,694 @@
+; RUN: opt < %s -gvn-sink -simplifycfg -simplifycfg-sink-common=false -S | FileCheck %s
+
+define zeroext i1 @test1(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+; CHECK-LABEL: test1
+; CHECK: add
+; CHECK: select
+; CHECK: icmp
+; CHECK-NOT: br
+if.then:
+ %cmp = icmp uge i32 %blksA, %nblks
+ %frombool1 = zext i1 %cmp to i8
+ br label %if.end
+
+if.else:
+ %add = add i32 %nblks, %blksB
+ %cmp2 = icmp ule i32 %add, %blksA
+ %frombool3 = zext i1 %cmp2 to i8
+ br label %if.end
+
+if.end:
+ %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ]
+ %tobool4 = icmp ne i8 %obeys.0, 0
+ ret i1 %tobool4
+}
+
+define zeroext i1 @test2(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+; CHECK-LABEL: test2
+; CHECK: add
+; CHECK: select
+; CHECK: icmp
+; CHECK-NOT: br
+if.then:
+ %cmp = icmp uge i32 %blksA, %nblks
+ %frombool1 = zext i1 %cmp to i8
+ br label %if.end
+
+if.else:
+ %add = add i32 %nblks, %blksB
+ %cmp2 = icmp uge i32 %blksA, %add
+ %frombool3 = zext i1 %cmp2 to i8
+ br label %if.end
+
+if.end:
+ %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ]
+ %tobool4 = icmp ne i8 %obeys.0, 0
+ ret i1 %tobool4
+}
+
+declare i32 @foo(i32, i32) nounwind readnone
+
+define i32 @test3(i1 zeroext %flag, i32 %x, i32 %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %x0 = call i32 @foo(i32 %x, i32 0) nounwind readnone
+ %y0 = call i32 @foo(i32 %x, i32 1) nounwind readnone
+ br label %if.end
+
+if.else:
+ %x1 = call i32 @foo(i32 %y, i32 0) nounwind readnone
+ %y1 = call i32 @foo(i32 %y, i32 1) nounwind readnone
+ br label %if.end
+
+if.end:
+ %xx = phi i32 [ %x0, %if.then ], [ %x1, %if.else ]
+ %yy = phi i32 [ %y0, %if.then ], [ %y1, %if.else ]
+ %ret = add i32 %xx, %yy
+ ret i32 %ret
+}
+
+; CHECK-LABEL: test3
+; CHECK: select
+; CHECK: call
+; CHECK: call
+; CHECK: add
+; CHECK-NOT: br
+
+define i32 @test4(i1 zeroext %flag, i32 %x, i32* %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %a = add i32 %x, 5
+ store i32 %a, i32* %y
+ br label %if.end
+
+if.else:
+ %b = add i32 %x, 7
+ store i32 %b, i32* %y
+ br label %if.end
+
+if.end:
+ ret i32 1
+}
+
+; CHECK-LABEL: test4
+; CHECK: select
+; CHECK: store
+; CHECK-NOT: store
+
+define i32 @test5(i1 zeroext %flag, i32 %x, i32* %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %a = add i32 %x, 5
+ store volatile i32 %a, i32* %y
+ br label %if.end
+
+if.else:
+ %b = add i32 %x, 7
+ store i32 %b, i32* %y
+ br label %if.end
+
+if.end:
+ ret i32 1
+}
+
+; CHECK-LABEL: test5
+; CHECK: store volatile
+; CHECK: store
+
+define i32 @test6(i1 zeroext %flag, i32 %x, i32* %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %a = add i32 %x, 5
+ store volatile i32 %a, i32* %y
+ br label %if.end
+
+if.else:
+ %b = add i32 %x, 7
+ store volatile i32 %b, i32* %y
+ br label %if.end
+
+if.end:
+ ret i32 1
+}
+
+; CHECK-LABEL: test6
+; CHECK: select
+; CHECK: store volatile
+; CHECK-NOT: store
+
+define i32 @test7(i1 zeroext %flag, i32 %x, i32* %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %z = load volatile i32, i32* %y
+ %a = add i32 %z, 5
+ store volatile i32 %a, i32* %y
+ br label %if.end
+
+if.else:
+ %w = load volatile i32, i32* %y
+ %b = add i32 %w, 7
+ store volatile i32 %b, i32* %y
+ br label %if.end
+
+if.end:
+ ret i32 1
+}
+
+; CHECK-LABEL: test7
+; CHECK-DAG: select
+; CHECK-DAG: load volatile
+; CHECK: store volatile
+; CHECK-NOT: load
+; CHECK-NOT: store
+
+; The extra store in %if.then means %z and %w are not equivalent.
+define i32 @test9(i1 zeroext %flag, i32 %x, i32* %y, i32* %p) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ store i32 7, i32* %p
+ %z = load volatile i32, i32* %y
+ store i32 6, i32* %p
+ %a = add i32 %z, 5
+ store volatile i32 %a, i32* %y
+ br label %if.end
+
+if.else:
+ %w = load volatile i32, i32* %y
+ %b = add i32 %w, 7
+ store volatile i32 %b, i32* %y
+ br label %if.end
+
+if.end:
+ ret i32 1
+}
+
+; CHECK-LABEL: test9
+; CHECK: add
+; CHECK: add
+
+%struct.anon = type { i32, i32 }
+
+; The GEP indexes a struct type so cannot have a variable last index.
+define i32 @test10(i1 zeroext %flag, i32 %x, i32* %y, %struct.anon* %s) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %dummy = add i32 %x, 5
+ %gepa = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 0
+ store volatile i32 %x, i32* %gepa
+ br label %if.end
+
+if.else:
+ %dummy1 = add i32 %x, 6
+ %gepb = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1
+ store volatile i32 %x, i32* %gepb
+ br label %if.end
+
+if.end:
+ ret i32 1
+}
+
+; CHECK-LABEL: test10
+; CHECK: getelementptr
+; CHECK: store volatile
+; CHECK: getelementptr
+; CHECK: store volatile
+
+; The shufflevector's mask operand cannot be merged in a PHI.
+define i32 @test11(i1 zeroext %flag, i32 %w, <2 x i32> %x, <2 x i32> %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %dummy = add i32 %w, 5
+ %sv1 = shufflevector <2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 0, i32 1>
+ br label %if.end
+
+if.else:
+ %dummy1 = add i32 %w, 6
+ %sv2 = shufflevector <2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 1, i32 0>
+ br label %if.end
+
+if.end:
+ %p = phi <2 x i32> [ %sv1, %if.then ], [ %sv2, %if.else ]
+ ret i32 1
+}
+
+; CHECK-LABEL: test11
+; CHECK: shufflevector
+; CHECK: shufflevector
+
+; We can't common an intrinsic!
+define i32 @test12(i1 zeroext %flag, i32 %w, i32 %x, i32 %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %dummy = add i32 %w, 5
+ %sv1 = call i32 @llvm.ctlz.i32(i32 %x)
+ br label %if.end
+
+if.else:
+ %dummy1 = add i32 %w, 6
+ %sv2 = call i32 @llvm.cttz.i32(i32 %x)
+ br label %if.end
+
+if.end:
+ %p = phi i32 [ %sv1, %if.then ], [ %sv2, %if.else ]
+ ret i32 1
+}
+
+declare i32 @llvm.ctlz.i32(i32 %x) readnone
+declare i32 @llvm.cttz.i32(i32 %x) readnone
+
+; CHECK-LABEL: test12
+; CHECK: call i32 @llvm.ctlz
+; CHECK: call i32 @llvm.cttz
+
+; The TBAA metadata should be properly combined.
+define i32 @test13(i1 zeroext %flag, i32 %x, i32* %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %z = load volatile i32, i32* %y
+ %a = add i32 %z, 5
+ store volatile i32 %a, i32* %y, !tbaa !3
+ br label %if.end
+
+if.else:
+ %w = load volatile i32, i32* %y
+ %b = add i32 %w, 7
+ store volatile i32 %b, i32* %y, !tbaa !4
+ br label %if.end
+
+if.end:
+ ret i32 1
+}
+
+!0 = !{ !"an example type tree" }
+!1 = !{ !"int", !0 }
+!2 = !{ !"float", !0 }
+!3 = !{ !"const float", !2, i64 0 }
+!4 = !{ !"special float", !2, i64 1 }
+
+; CHECK-LABEL: test13
+; CHECK-DAG: select
+; CHECK-DAG: load volatile
+; CHECK: store volatile {{.*}}, !tbaa !0
+; CHECK-NOT: load
+; CHECK-NOT: store
+
+; The call should be commoned.
+define i32 @test13a(i1 zeroext %flag, i32 %w, i32 %x, i32 %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %sv1 = call i32 @bar(i32 %x)
+ br label %if.end
+
+if.else:
+ %sv2 = call i32 @bar(i32 %y)
+ br label %if.end
+
+if.end:
+ %p = phi i32 [ %sv1, %if.then ], [ %sv2, %if.else ]
+ ret i32 1
+}
+declare i32 @bar(i32)
+
+; CHECK-LABEL: test13a
+; CHECK: %[[x:.*]] = select i1 %flag
+; CHECK: call i32 @bar(i32 %[[x]])
+
+; The load should be commoned.
+define i32 @test14(i1 zeroext %flag, i32 %w, i32 %x, i32 %y, %struct.anon* %s) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %dummy = add i32 %x, 1
+ %gepa = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1
+ %sv1 = load i32, i32* %gepa
+ %cmp1 = icmp eq i32 %sv1, 56
+ br label %if.end
+
+if.else:
+ %dummy2 = add i32 %x, 4
+ %gepb = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1
+ %sv2 = load i32, i32* %gepb
+ %cmp2 = icmp eq i32 %sv2, 57
+ br label %if.end
+
+if.end:
+ %p = phi i1 [ %cmp1, %if.then ], [ %cmp2, %if.else ]
+ ret i32 1
+}
+
+; CHECK-LABEL: test14
+; CHECK: getelementptr
+; CHECK: load
+; CHECK-NOT: load
+
+; The load should be commoned.
+define i32 @test15(i1 zeroext %flag, i32 %w, i32 %x, i32 %y, %struct.anon* %s) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %dummy = add i32 %x, 1
+ %gepa = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 0
+ %sv1 = load i32, i32* %gepa
+ %ext1 = zext i32 %sv1 to i64
+ %cmp1 = icmp eq i64 %ext1, 56
+ br label %if.end
+
+if.else:
+ %dummy2 = add i32 %x, 4
+ %gepb = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1
+ %sv2 = load i32, i32* %gepb
+ %ext2 = zext i32 %sv2 to i64
+ %cmp2 = icmp eq i64 %ext2, 56
+ br label %if.end
+
+if.end:
+ %p = phi i1 [ %cmp1, %if.then ], [ %cmp2, %if.else ]
+ ret i32 1
+}
+
+; CHECK-LABEL: test15
+; CHECK: getelementptr
+; CHECK: load
+; CHECK-NOT: load
+
+define zeroext i1 @test_crash(i1 zeroext %flag, i32* %i4, i32* %m, i32* %n) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %tmp1 = load i32, i32* %i4
+ %tmp2 = add i32 %tmp1, -1
+ store i32 %tmp2, i32* %i4
+ br label %if.end
+
+if.else:
+ %tmp3 = load i32, i32* %m
+ %tmp4 = load i32, i32* %n
+ %tmp5 = add i32 %tmp3, %tmp4
+ store i32 %tmp5, i32* %i4
+ br label %if.end
+
+if.end:
+ ret i1 true
+}
+
+; CHECK-LABEL: test_crash
+; No checks for test_crash - just ensure it doesn't crash!
+
+define zeroext i1 @test16(i1 zeroext %flag, i1 zeroext %flag2, i32 %blksA, i32 %blksB, i32 %nblks) {
+
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %cmp = icmp uge i32 %blksA, %nblks
+ %frombool1 = zext i1 %cmp to i8
+ br label %if.end
+
+if.else:
+ br i1 %flag2, label %if.then2, label %if.end
+
+if.then2:
+ %add = add i32 %nblks, %blksB
+ %cmp2 = icmp ule i32 %add, %blksA
+ %frombool3 = zext i1 %cmp2 to i8
+ br label %if.end
+
+if.end:
+ %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ 0, %if.else ]
+ %tobool4 = icmp ne i8 %obeys.0, 0
+ ret i1 %tobool4
+}
+
+; CHECK-LABEL: test16
+; CHECK: zext
+; CHECK: zext
+
+define zeroext i1 @test16a(i1 zeroext %flag, i1 zeroext %flag2, i32 %blksA, i32 %blksB, i32 %nblks, i8* %p) {
+
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %cmp = icmp uge i32 %blksA, %nblks
+ %frombool1 = zext i1 %cmp to i8
+ %b1 = sext i8 %frombool1 to i32
+ %b2 = trunc i32 %b1 to i8
+ store i8 %b2, i8* %p
+ br label %if.end
+
+if.else:
+ br i1 %flag2, label %if.then2, label %if.end
+
+if.then2:
+ %add = add i32 %nblks, %blksB
+ %cmp2 = icmp ule i32 %add, %blksA
+ %frombool3 = zext i1 %cmp2 to i8
+ %a1 = sext i8 %frombool3 to i32
+ %a2 = trunc i32 %a1 to i8
+ store i8 %a2, i8* %p
+ br label %if.end
+
+if.end:
+ ret i1 true
+}
+
+; CHECK-LABEL: test16a
+; CHECK: zext
+; CHECK-NOT: zext
+
+define zeroext i1 @test17(i32 %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
+entry:
+ switch i32 %flag, label %if.end [
+ i32 0, label %if.then
+ i32 1, label %if.then2
+ ]
+
+if.then:
+ %cmp = icmp uge i32 %blksA, %nblks
+ %frombool1 = call i8 @i1toi8(i1 %cmp)
+ %a1 = sext i8 %frombool1 to i32
+ %a2 = trunc i32 %a1 to i8
+ br label %if.end
+
+if.then2:
+ %add = add i32 %nblks, %blksB
+ %cmp2 = icmp ule i32 %add, %blksA
+ %frombool3 = call i8 @i1toi8(i1 %cmp2)
+ %b1 = sext i8 %frombool3 to i32
+ %b2 = trunc i32 %b1 to i8
+ br label %if.end
+
+if.end:
+ %obeys.0 = phi i8 [ %a2, %if.then ], [ %b2, %if.then2 ], [ 0, %entry ]
+ %tobool4 = icmp ne i8 %obeys.0, 0
+ ret i1 %tobool4
+}
+declare i8 @i1toi8(i1)
+
+; FIXME: DISABLED - we don't consider this profitable. We should
+; - Consider argument setup/return mov'ing for calls, like InlineCost does.
+; - Consider the removal of the %obeys.0 PHI (zero PHI movement overall)
+
+; DISABLED-CHECK-LABEL: test17
+; DISABLED-CHECK: if.then:
+; DISABLED-CHECK-NEXT: icmp uge
+; DISABLED-CHECK-NEXT: br label %[[x:.*]]
+
+; DISABLED-CHECK: if.then2:
+; DISABLED-CHECK-NEXT: add
+; DISABLED-CHECK-NEXT: icmp ule
+; DISABLED-CHECK-NEXT: br label %[[x]]
+
+; DISABLED-CHECK: [[x]]:
+; DISABLED-CHECK-NEXT: %[[y:.*]] = phi i1 [ %cmp
+; DISABLED-CHECK-NEXT: %[[z:.*]] = call i8 @i1toi8(i1 %[[y]])
+; DISABLED-CHECK-NEXT: br label %if.end
+
+; DISABLED-CHECK: if.end:
+; DISABLED-CHECK-NEXT: phi i8
+; DISABLED-CHECK-DAG: [ %[[z]], %[[x]] ]
+; DISABLED-CHECK-DAG: [ 0, %entry ]
+
+define zeroext i1 @test18(i32 %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
+entry:
+ switch i32 %flag, label %if.then3 [
+ i32 0, label %if.then
+ i32 1, label %if.then2
+ ]
+
+if.then:
+ %cmp = icmp uge i32 %blksA, %nblks
+ %frombool1 = zext i1 %cmp to i8
+ br label %if.end
+
+if.then2:
+ %add = add i32 %nblks, %blksB
+ %cmp2 = icmp ule i32 %add, %blksA
+ %frombool3 = zext i1 %cmp2 to i8
+ br label %if.end
+
+if.then3:
+ %add2 = add i32 %nblks, %blksA
+ %cmp3 = icmp ule i32 %add2, %blksA
+ %frombool4 = zext i1 %cmp3 to i8
+ br label %if.end
+
+if.end:
+ %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ %frombool4, %if.then3 ]
+ %tobool4 = icmp ne i8 %obeys.0, 0
+ ret i1 %tobool4
+}
+
+; CHECK-LABEL: test18
+; CHECK: if.end:
+; CHECK-NEXT: %[[x:.*]] = phi i1
+; CHECK-DAG: [ %cmp, %if.then ]
+; CHECK-DAG: [ %cmp2, %if.then2 ]
+; CHECK-DAG: [ %cmp3, %if.then3 ]
+; CHECK-NEXT: zext i1 %[[x]] to i8
+
+; The phi is confusing - both add instructions are used by it, but
+; not on their respective unconditional arcs. It should not be
+; optimized.
+define void @test_pr30292(i1 %cond, i1 %cond2, i32 %a, i32 %b) {
+entry:
+ %add1 = add i32 %a, 1
+ br label %succ
+
+one:
+ br i1 %cond, label %two, label %succ
+
+two:
+ call void @g()
+ %add2 = add i32 %a, 1
+ br label %succ
+
+succ:
+ %p = phi i32 [ 0, %entry ], [ %add1, %one ], [ %add2, %two ]
+ br label %one
+}
+declare void @g()
+
+; CHECK-LABEL: test_pr30292
+; CHECK: phi i32 [ 0, %entry ], [ %add1, %succ ], [ %add2, %two ]
+
+define zeroext i1 @test_pr30244(i1 zeroext %flag, i1 zeroext %flag2, i32 %blksA, i32 %blksB, i32 %nblks) {
+
+entry:
+ %p = alloca i8
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %cmp = icmp uge i32 %blksA, %nblks
+ %frombool1 = zext i1 %cmp to i8
+ store i8 %frombool1, i8* %p
+ br label %if.end
+
+if.else:
+ br i1 %flag2, label %if.then2, label %if.end
+
+if.then2:
+ %add = add i32 %nblks, %blksB
+ %cmp2 = icmp ule i32 %add, %blksA
+ %frombool3 = zext i1 %cmp2 to i8
+ store i8 %frombool3, i8* %p
+ br label %if.end
+
+if.end:
+ ret i1 true
+}
+
+; CHECK-LABEL: @test_pr30244
+; CHECK: store
+; CHECK-NOT: store
+
+define i32 @test_pr30373a(i1 zeroext %flag, i32 %x, i32 %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %x0 = call i32 @foo(i32 %x, i32 0) nounwind readnone
+ %y0 = call i32 @foo(i32 %x, i32 1) nounwind readnone
+ %z0 = lshr i32 %y0, 8
+ br label %if.end
+
+if.else:
+ %x1 = call i32 @foo(i32 %y, i32 0) nounwind readnone
+ %y1 = call i32 @foo(i32 %y, i32 1) nounwind readnone
+ %z1 = lshr exact i32 %y1, 8
+ br label %if.end
+
+if.end:
+ %xx = phi i32 [ %x0, %if.then ], [ %x1, %if.else ]
+ %yy = phi i32 [ %z0, %if.then ], [ %z1, %if.else ]
+ %ret = add i32 %xx, %yy
+ ret i32 %ret
+}
+
+; CHECK-LABEL: test_pr30373a
+; CHECK: lshr
+; CHECK-NOT: exact
+; CHECK: }
+
+define i32 @test_pr30373b(i1 zeroext %flag, i32 %x, i32 %y) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %x0 = call i32 @foo(i32 %x, i32 0) nounwind readnone
+ %y0 = call i32 @foo(i32 %x, i32 1) nounwind readnone
+ %z0 = lshr exact i32 %y0, 8
+ br label %if.end
+
+if.else:
+ %x1 = call i32 @foo(i32 %y, i32 0) nounwind readnone
+ %y1 = call i32 @foo(i32 %y, i32 1) nounwind readnone
+ %z1 = lshr i32 %y1, 8
+ br label %if.end
+
+if.end:
+ %xx = phi i32 [ %x0, %if.then ], [ %x1, %if.else ]
+ %yy = phi i32 [ %z0, %if.then ], [ %z1, %if.else ]
+ %ret = add i32 %xx, %yy
+ ret i32 %ret
+}
+
+; CHECK-LABEL: test_pr30373b
+; CHECK: lshr
+; CHECK-NOT: exact
+; CHECK: }
+
+; CHECK: !0 = !{!1, !1, i64 0}
+; CHECK: !1 = !{!"float", !2}
+; CHECK: !2 = !{!"an example type tree"}
diff --git a/llvm/test/Transforms/GVNSink/struct.ll b/llvm/test/Transforms/GVNSink/struct.ll
new file mode 100644
index 00000000000..2228cf2803a
--- /dev/null
+++ b/llvm/test/Transforms/GVNSink/struct.ll
@@ -0,0 +1,71 @@
+; RUN: opt -gvn-sink -S < %s | FileCheck %s
+
+%struct = type {i32, i32}
+%struct2 = type { [ 2 x i32], i32 }
+
+; Struct indices cannot be variant.
+
+; CHECK-LABEL: @f() {
+; CHECK: getelementptr
+; CHECK: getelementptr
+define void @f() {
+bb:
+ br i1 undef, label %bb2, label %bb1
+
+bb1: ; preds = %bb
+ %tmp = getelementptr inbounds %struct, %struct* null, i64 0, i32 1
+ br label %bb4
+
+bb2: ; preds = %bb
+ %tmp3 = getelementptr inbounds %struct, %struct* null, i64 0, i32 0
+ br label %bb4
+
+bb4: ; preds = %bb2, %bb1
+ %tmp5 = phi i32 [ 1, %bb1 ], [ 0, %bb2 ]
+ ret void
+}
+
+; Struct indices cannot be variant.
+
+; CHECK-LABEL: @g() {
+; CHECK: getelementptr
+; CHECK: getelementptr
+define void @g() {
+bb:
+ br i1 undef, label %bb2, label %bb1
+
+bb1: ; preds = %bb
+ %tmp = getelementptr inbounds %struct2, %struct2* null, i64 0, i32 0, i32 1
+ br label %bb4
+
+bb2: ; preds = %bb
+ %tmp3 = getelementptr inbounds %struct2, %struct2* null, i64 0, i32 0, i32 0
+ br label %bb4
+
+bb4: ; preds = %bb2, %bb1
+ %tmp5 = phi i32 [ 1, %bb1 ], [ 0, %bb2 ]
+ ret void
+}
+
+
+; ... but the first parameter to a GEP can.
+
+; CHECK-LABEL: @h() {
+; CHECK: getelementptr
+; CHECK-NOT: getelementptr
+define void @h() {
+bb:
+ br i1 undef, label %bb2, label %bb1
+
+bb1: ; preds = %bb
+ %tmp = getelementptr inbounds %struct, %struct* null, i32 0, i32 0
+ br label %bb4
+
+bb2: ; preds = %bb
+ %tmp3 = getelementptr inbounds %struct, %struct* null, i32 1, i32 0
+ br label %bb4
+
+bb4: ; preds = %bb2, %bb1
+ %tmp5 = phi i32 [ 0, %bb1 ], [ 1, %bb2 ]
+ ret void
+} \ No newline at end of file
OpenPOWER on IntegriCloud