[InstCombine] Introducing Aggressive Instruction Combine pass (-aggressive-instcombine).

Combine expression patterns to form expressions with fewer, simple instructions. This pass does not modify the CFG. For example, this pass reduce width of expressions post-dominated by TruncInst into smaller width when applicable. It differs from instcombine pass in that it contains pattern optimization that requires higher complexity than the O(1), thus, it should run fewer times than instcombine pass. Differential Revision: https://reviews.llvm.org/D38313 llvm-svn: 323321
author: Amjad Aboud <amjad.aboud@intel.com> 2018-01-24 12:42:42 +0000
committer: Amjad Aboud <amjad.aboud@intel.com> 2018-01-24 12:42:42 +0000
commit: e4453233d78788989c4bf2ff927a9e67433fb63d (patch)
tree: 852418eb4b403be8210679108562c8ddd6fb0033
parent: f26df4783132de2a534572a53847716a89d98339 (diff)
download: bcm5719-llvm-e4453233d78788989c4bf2ff927a9e67433fb63d.tar.gz
bcm5719-llvm-e4453233d78788989c4bf2ff927a9e67433fb63d.zip
24 files changed, 958 insertions, 6 deletions
diff --git a/llvm/docs/Passes.rst b/llvm/docs/Passes.rst
index 77461f3c52d..39874e4c8a0 100644
--- a/llvm/docs/Passes.rst
+++ b/llvm/docs/Passes.rst
@@ -641,6 +641,21 @@ not library calls are simplified is controlled by the
 :ref:`-functionattrs <passes-functionattrs>` pass and LLVM's knowledge of
 library calls on different targets.
 
+.. _passes-aggressive-instcombine:
+
+``-aggressive-instcombine``: Combine expression patterns
+--------------------------------------------------------
+
+Combine expression patterns to form expressions with fewer, simple instructions.
+This pass does not modify the CFG.
+
+For example, this pass reduce width of expressions post-dominated by TruncInst
+into smaller width when applicable.
+
+It differs from instcombine pass in that it contains pattern optimization that
+requires higher complexity than the O(1), thus, it should run fewer times than
+instcombine pass.
+
 ``-internalize``: Internalize Global Symbols
 --------------------------------------------
 
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index e8f152feff7..92cd1edcdc1 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -64,6 +64,7 @@ void initializeADCELegacyPassPass(PassRegistry&);
 void initializeAddDiscriminatorsLegacyPassPass(PassRegistry&);
 void initializeAddressSanitizerModulePass(PassRegistry&);
 void initializeAddressSanitizerPass(PassRegistry&);
+void initializeAggressiveInstCombinerLegacyPassPass(PassRegistry&);
 void initializeAliasSetPrinterPass(PassRegistry&);
 void initializeAlignmentFromAssumptionsPass(PassRegistry&);
 void initializeAlwaysInlinerLegacyPassPass(PassRegistry&);
diff --git a/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h b/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
new file mode 100644
index 00000000000..a318d18ab31
--- /dev/null
+++ b/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
@@ -0,0 +1,34 @@
+//===- AggressiveInstCombine.h - AggressiveInstCombine pass -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file provides the primary interface to the aggressive instcombine pass.
+/// This pass is suitable for use in the new pass manager. For a pass that works
+/// with the legacy pass manager, please look for
+/// \c createAggressiveInstCombinerPass() in Scalar.h.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_AGGRESSIVE_INSTCOMBINE_INSTCOMBINE_H
+#define LLVM_TRANSFORMS_AGGRESSIVE_INSTCOMBINE_INSTCOMBINE_H
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class AggressiveInstCombinePass
+    : public PassInfoMixin<AggressiveInstCombinePass> {
+
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+}
+
+#endif
diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
index 49186bc5cd6..869db23ed6e 100644
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -142,6 +142,13 @@ FunctionPass *createInstructionCombiningPass(bool ExpensiveCombines = true);
 
 //===----------------------------------------------------------------------===//
 //
+// AggressiveInstCombiner - Combine expression patterns to form expressions with
+// fewer, simple instructions. This pass does not modify the CFG.
+//
+FunctionPass *createAggressiveInstCombinerPass();
+
+//===----------------------------------------------------------------------===//
+//
 // LICM - This pass is a loop invariant code motion and memory promotion pass.
 //
 Pass *createLICMPass();
diff --git a/llvm/lib/LTO/LLVMBuild.txt b/llvm/lib/LTO/LLVMBuild.txt
index b940362e523..a1993314e36 100644
--- a/llvm/lib/LTO/LLVMBuild.txt
+++ b/llvm/lib/LTO/LLVMBuild.txt
@@ -20,6 +20,7 @@ type = Library
 name = LTO
 parent = Libraries
 required_libraries =
+ AggressiveInstCombine
  Analysis
  BitReader
  BitWriter
diff --git a/llvm/lib/Passes/LLVMBuild.txt b/llvm/lib/Passes/LLVMBuild.txt
index e2378a84328..d6e9bf1fcf3 100644
--- a/llvm/lib/Passes/LLVMBuild.txt
+++ b/llvm/lib/Passes/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Library
 name = Passes
 parent = Libraries
-required_libraries = Analysis CodeGen Core IPO InstCombine Scalar Support Target TransformUtils Vectorize Instrumentation
+required_libraries = AggressiveInstCombine Analysis CodeGen Core IPO InstCombine Scalar Support Target TransformUtils Vectorize Instrumentation
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 8ea51e04afe..89570f7d153 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -59,6 +59,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
 #include "llvm/Transforms/GCOVProfiler.h"
 #include "llvm/Transforms/IPO/AlwaysInliner.h"
 #include "llvm/Transforms/IPO/ArgumentPromotion.h"
@@ -363,6 +364,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
   FPM.addPass(JumpThreadingPass());
   FPM.addPass(CorrelatedValuePropagationPass());
   FPM.addPass(SimplifyCFGPass());
+  if (Level == O3)
+    FPM.addPass(AggressiveInstCombinePass());
   FPM.addPass(InstCombinePass());
 
   if (!isOptimizingForSize(Level))
@@ -1010,6 +1013,8 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
   // function pointers.  When this happens, we often have to resolve varargs
   // calls, etc, so let instcombine do this.
   FunctionPassManager PeepholeFPM(DebugLogging);
+  if (Level == O3)
+    PeepholeFPM.addPass(AggressiveInstCombinePass());
   PeepholeFPM.addPass(InstCombinePass());
   invokePeepholeEPCallbacks(PeepholeFPM, Level);
 
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 9ac95ee6fa8..bebeb8f2a7e 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -139,6 +139,7 @@ FUNCTION_ALIAS_ANALYSIS("type-based-aa", TypeBasedAA())
 FUNCTION_PASS("aa-eval", AAEvaluator())
 FUNCTION_PASS("adce", ADCEPass())
 FUNCTION_PASS("add-discriminators", AddDiscriminatorsPass())
+FUNCTION_PASS("aggressive-instcombine", AggressiveInstCombinePass())
 FUNCTION_PASS("alignment-from-assumptions", AlignmentFromAssumptionsPass())
 FUNCTION_PASS("bdce", BDCEPass())
 FUNCTION_PASS("bounds-checking", BoundsCheckingPass())
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
new file mode 100644
index 00000000000..b7364d2ecdf
--- /dev/null
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -0,0 +1,110 @@
+//===- AggressiveInstCombine.cpp ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the aggressive expression pattern combiner classes.
+// Currently, it handles expression patterns for:
+//  * Truncate instruction
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
+#include "AggressiveInstCombineInternal.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "aggressive-instcombine"
+
+namespace {
+/// Contains expression pattern combiner logic.
+/// This class provides both the logic to combine expression patterns and
+/// combine them. It differs from InstCombiner class in that each pattern
+/// combiner runs only once as opposed to InstCombine's multi-iteration,
+/// which allows pattern combiner to have higher complexity than the O(1)
+/// required by the instruction combiner.
+class AggressiveInstCombinerLegacyPass : public FunctionPass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+
+  AggressiveInstCombinerLegacyPass() : FunctionPass(ID) {
+    initializeAggressiveInstCombinerLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  /// Run all expression pattern optimizations on the given /p F function.
+  ///
+  /// \param F function to optimize.
+  /// \returns true if the IR is changed.
+  bool runOnFunction(Function &F) override;
+};
+} // namespace
+
+void AggressiveInstCombinerLegacyPass::getAnalysisUsage(
+    AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
+  AU.addPreserved<AAResultsWrapperPass>();
+  AU.addPreserved<BasicAAWrapperPass>();
+  AU.addPreserved<GlobalsAAWrapperPass>();
+}
+
+bool AggressiveInstCombinerLegacyPass::runOnFunction(Function &F) {
+  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  auto &DL = F.getParent()->getDataLayout();
+
+  bool MadeIRChange = false;
+
+  // Handle TruncInst patterns
+  TruncInstCombine TIC(TLI, DL);
+  MadeIRChange |= TIC.run(F);
+
+  // TODO: add more patterns to handle...
+
+  return MadeIRChange;
+}
+
+PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
+                                                 FunctionAnalysisManager &AM) {
+  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+  auto &DL = F.getParent()->getDataLayout();
+  bool MadeIRChange = false;
+
+  // Handle TruncInst patterns
+  TruncInstCombine TIC(TLI, DL);
+  MadeIRChange |= TIC.run(F);
+  if (!MadeIRChange)
+    // No changes, all analyses are preserved.
+    return PreservedAnalyses::all();
+
+  // Mark all the analyses that instcombine updates as preserved.
+  PreservedAnalyses PA;
+  PA.preserveSet<CFGAnalyses>();
+  PA.preserve<AAManager>();
+  PA.preserve<GlobalsAA>();
+  return PA;
+}
+
+char AggressiveInstCombinerLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(AggressiveInstCombinerLegacyPass,
+                      "aggressive-instcombine",
+                      "Combine pattern based expressions", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(AggressiveInstCombinerLegacyPass, "aggressive-instcombine",
+                    "Combine pattern based expressions", false, false)
+
+FunctionPass *llvm::createAggressiveInstCombinerPass() {
+  return new AggressiveInstCombinerLegacyPass();
+}
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
new file mode 100644
index 00000000000..63255dd28f8
--- /dev/null
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
@@ -0,0 +1,119 @@
+//===- AggressiveInstCombineInternal.h --------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the instruction pattern combiner classes.
+// Currently, it handles pattern expressions for:
+//  * Truncate instruction
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// TruncInstCombine - looks for expression dags dominated by trunc instructions
+// and for each eligible dag, it will create a reduced bit-width expression and
+// replace the old expression with this new one and remove the old one.
+// Eligible expression dag is such that:
+//   1. Contains only supported instructions.
+//   2. Supported leaves: ZExtInst, SExtInst, TruncInst and Constant value.
+//   3. Can be evaluated into type with reduced legal bit-width (or Trunc type).
+//   4. All instructions in the dag must not have users outside the dag.
+//      Only exception is for {ZExt, SExt}Inst with operand type equal to the
+//      new reduced type chosen in (3).
+//
+// The motivation for this optimization is that evaluating and expression using
+// smaller bit-width is preferable, especially for vectorization where we can
+// fit more values in one vectorized instruction. In addition, this optimization
+// may decrease the number of cast instructions, but will not increase it.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+  class DataLayout;
+  class TargetLibraryInfo;
+
+class TruncInstCombine {
+  TargetLibraryInfo &TLI;
+  const DataLayout &DL;
+
+  /// List of all TruncInst instructions to be processed.
+  SmallVector<TruncInst *, 4> Worklist;
+
+  /// Current processed TruncInst instruction.
+  TruncInst *CurrentTruncInst;
+
+  /// Information per each instruction in the expression dag.
+  struct Info {
+    /// Number of LSBs that are needed to generate a valid expression.
+    unsigned ValidBitWidth = 0;
+    /// Minimum number of LSBs needed to generate the ValidBitWidth.
+    unsigned MinBitWidth = 0;
+    /// The reduced value generated to replace the old instruction.
+    Value *NewValue = nullptr;
+  };
+  /// An ordered map representing expression dag post-dominated by current
+  /// processed TruncInst. It maps each instruction in the dag to its Info
+  /// structure. The map is ordered such that each instruction appears before
+  /// all other instructions in the dag that uses it.
+  MapVector<Instruction *, Info> InstInfoMap;
+
+public:
+  TruncInstCombine(TargetLibraryInfo &TLI, const DataLayout &DL)
+      : TLI(TLI), DL(DL), CurrentTruncInst(nullptr) {}
+
+  /// Perform TruncInst pattern optimization on given function.
+  bool run(Function &F);
+
+private:
+  /// Build expression dag dominated by the /p CurrentTruncInst and append it to
+  /// the InstInfoMap container.
+  ///
+  /// \return true only if succeed to generate an eligible sub expression dag.
+  bool buildTruncExpressionDag();
+
+  /// Calculate the minimal allowed bit-width of the chain ending with the
+  /// currently visited truncate's operand.
+  ///
+  /// \return minimum number of bits to which the chain ending with the
+  /// truncate's operand can be shrunk to.
+  unsigned getMinBitWidth();
+
+  /// Build an expression dag dominated by the current processed TruncInst and
+  /// Check if it is eligible to be reduced to a smaller type.
+  ///
+  /// \return the scalar version of the new type to be used for the reduced
+  ///         expression dag, or nullptr if the expression dag is not eligible
+  ///         to be reduced.
+  Type *getBestTruncatedType();
+
+  /// Given a \p V value and a \p SclTy scalar type return the generated reduced
+  /// value of \p V based on the type \p SclTy.
+  ///
+  /// \param V value to be reduced.
+  /// \param SclTy scalar version of new type to reduce to.
+  /// \return the new reduced value.
+  Value *getReducedOperand(Value *V, Type *SclTy);
+
+  /// Create a new expression dag using the reduced /p SclTy type and replace
+  /// the old expression dag with it. Also erase all instructions in the old
+  /// dag, except those that are still needed outside the dag.
+  ///
+  /// \param SclTy scalar version of new type to reduce expression dag into.
+  void ReduceExpressionDag(Type *SclTy);
+};
+} // end namespace llvm.
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/CMakeLists.txt b/llvm/lib/Transforms/AggressiveInstCombine/CMakeLists.txt
new file mode 100644
index 00000000000..386314801e3
--- /dev/null
+++ b/llvm/lib/Transforms/AggressiveInstCombine/CMakeLists.txt
@@ -0,0 +1,11 @@
+add_llvm_library(LLVMAggressiveInstCombine
+  AggressiveInstCombine.cpp
+  TruncInstCombine.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/AggressiveInstCombine
+
+  DEPENDS
+  intrinsics_gen
+  )
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/LLVMBuild.txt b/llvm/lib/Transforms/AggressiveInstCombine/LLVMBuild.txt
new file mode 100644
index 00000000000..c05844f33de
--- /dev/null
+++ b/llvm/lib/Transforms/AggressiveInstCombine/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/Transforms/AggressiveInstCombine/LLVMBuild.txt -----*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AggressiveInstCombine
+parent = Transforms
+required_libraries = Analysis Core Support TransformUtils
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
new file mode 100644
index 00000000000..55dcb539dbb
--- /dev/null
+++ b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
@@ -0,0 +1,401 @@
+//===- TruncInstCombine.cpp -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TruncInstCombine - looks for expression dags post-dominated by TruncInst and
+// for each eligible dag, it will create a reduced bit-width expression, replace
+// the old expression with this new one and remove the old expression.
+// Eligible expression dag is such that:
+//   1. Contains only supported instructions.
+//   2. Supported leaves: ZExtInst, SExtInst, TruncInst and Constant value.
+//   3. Can be evaluated into type with reduced legal bit-width.
+//   4. All instructions in the dag must not have users outside the dag.
+//      The only exception is for {ZExt, SExt}Inst with operand type equal to
+//      the new reduced type evaluated in (3).
+//
+// The motivation for this optimization is that evaluating and expression using
+// smaller bit-width is preferable, especially for vectorization where we can
+// fit more values in one vectorized instruction. In addition, this optimization
+// may decrease the number of cast instructions, but will not increase it.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AggressiveInstCombineInternal.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IRBuilder.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "aggressive-instcombine"
+
+/// Given an instruction and a container, it fills all the relevant operands of
+/// that instruction, with respect to the Trunc expression dag optimizaton.
+static void getRelevantOperands(Instruction *I, SmallVectorImpl<Value *> &Ops) {
+  unsigned Opc = I->getOpcode();
+  switch (Opc) {
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+    // These CastInst are considered leaves of the evaluated expression, thus,
+    // their operands are not relevent.
+    break;
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    Ops.push_back(I->getOperand(0));
+    Ops.push_back(I->getOperand(1));
+    break;
+  default:
+    llvm_unreachable("Unreachable!");
+  }
+}
+
+bool TruncInstCombine::buildTruncExpressionDag() {
+  SmallVector<Value *, 8> Worklist;
+  SmallVector<Instruction *, 8> Stack;
+  // Clear old expression dag.
+  InstInfoMap.clear();
+
+  Worklist.push_back(CurrentTruncInst->getOperand(0));
+
+  while (!Worklist.empty()) {
+    Value *Curr = Worklist.back();
+
+    if (isa<Constant>(Curr)) {
+      Worklist.pop_back();
+      continue;
+    }
+
+    auto *I = dyn_cast<Instruction>(Curr);
+    if (!I)
+      return false;
+
+    if (!Stack.empty() && Stack.back() == I) {
+      // Already handled all instruction operands, can remove it from both the
+      // Worklist and the Stack, and add it to the instruction info map.
+      Worklist.pop_back();
+      Stack.pop_back();
+      // Insert I to the Info map.
+      InstInfoMap.insert(std::make_pair(I, Info()));
+      continue;
+    }
+
+    if (InstInfoMap.count(I)) {
+      Worklist.pop_back();
+      continue;
+    }
+
+    // Add the instruction to the stack before start handling its operands.
+    Stack.push_back(I);
+
+    unsigned Opc = I->getOpcode();
+    switch (Opc) {
+    case Instruction::Trunc:
+    case Instruction::ZExt:
+    case Instruction::SExt:
+      // trunc(trunc(x)) -> trunc(x)
+      // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
+      // trunc(ext(x)) -> trunc(x) if the source type is larger than the new
+      // dest
+      break;
+    case Instruction::Add:
+    case Instruction::Sub:
+    case Instruction::Mul:
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Xor: {
+      SmallVector<Value *, 2> Operands;
+      getRelevantOperands(I, Operands);
+      for (Value *Operand : Operands)
+        Worklist.push_back(Operand);
+      break;
+    }
+    default:
+      // TODO: Can handle more cases here:
+      // 1. select, shufflevector, extractelement, insertelement
+      // 2. udiv, urem
+      // 3. shl, lshr, ashr
+      // 4. phi node(and loop handling)
+      // ...
+      return false;
+    }
+  }
+  return true;
+}
+
+unsigned TruncInstCombine::getMinBitWidth() {
+  SmallVector<Value *, 8> Worklist;
+  SmallVector<Instruction *, 8> Stack;
+
+  Value *Src = CurrentTruncInst->getOperand(0);
+  Type *DstTy = CurrentTruncInst->getType();
+  unsigned TruncBitWidth = DstTy->getScalarSizeInBits();
+  unsigned OrigBitWidth =
+      CurrentTruncInst->getOperand(0)->getType()->getScalarSizeInBits();
+
+  if (isa<Constant>(Src))
+    return TruncBitWidth;
+
+  Worklist.push_back(Src);
+  InstInfoMap[cast<Instruction>(Src)].ValidBitWidth = TruncBitWidth;
+
+  while (!Worklist.empty()) {
+    Value *Curr = Worklist.back();
+
+    if (isa<Constant>(Curr)) {
+      Worklist.pop_back();
+      continue;
+    }
+
+    // Otherwise, it must be an instruction.
+    auto *I = cast<Instruction>(Curr);
+
+    auto &Info = InstInfoMap[I];
+
+    SmallVector<Value *, 2> Operands;
+    getRelevantOperands(I, Operands);
+
+    if (!Stack.empty() && Stack.back() == I) {
+      // Already handled all instruction operands, can remove it from both, the
+      // Worklist and the Stack, and update MinBitWidth.
+      Worklist.pop_back();
+      Stack.pop_back();
+      for (auto *Operand : Operands)
+        if (auto *IOp = dyn_cast<Instruction>(Operand))
+          Info.MinBitWidth =
+              std::max(Info.MinBitWidth, InstInfoMap[IOp].MinBitWidth);
+      continue;
+    }
+
+    // Add the instruction to the stack before start handling its operands.
+    Stack.push_back(I);
+    unsigned ValidBitWidth = Info.ValidBitWidth;
+
+    // Update minimum bit-width before handling its operands. This is required
+    // when the instruction is part of a loop.
+    Info.MinBitWidth = std::max(Info.MinBitWidth, Info.ValidBitWidth);
+
+    for (auto *Operand : Operands)
+      if (auto *IOp = dyn_cast<Instruction>(Operand)) {
+        // If we already calculated the minimum bit-width for this valid
+        // bit-width, or for a smaller valid bit-width, then just keep the
+        // answer we already calculated.
+        unsigned IOpBitwidth = InstInfoMap.lookup(IOp).ValidBitWidth;
+        if (IOpBitwidth >= ValidBitWidth)
+          continue;
+        InstInfoMap[IOp].ValidBitWidth = std::max(ValidBitWidth, IOpBitwidth);
+        Worklist.push_back(IOp);
+      }
+  }
+  unsigned MinBitWidth = InstInfoMap.lookup(cast<Instruction>(Src)).MinBitWidth;
+  assert(MinBitWidth >= TruncBitWidth);
+
+  if (MinBitWidth > TruncBitWidth) {
+    // In this case reducing expression with vector type might generate a new
+    // vector type, which is not preferable as it might result in generating
+    // sub-optimal code.
+    if (DstTy->isVectorTy())
+      return OrigBitWidth;
+    // Use the smallest integer type in the range [MinBitWidth, OrigBitWidth).
+    Type *Ty = DL.getSmallestLegalIntType(DstTy->getContext(), MinBitWidth);
+    // Update minimum bit-width with the new destination type bit-width if
+    // succeeded to find such, otherwise, with original bit-width.
+    MinBitWidth = Ty ? Ty->getScalarSizeInBits() : OrigBitWidth;
+  } else { // MinBitWidth == TruncBitWidth
+    // In this case the expression can be evaluated with the trunc instruction
+    // destination type, and trunc instruction can be omitted. However, we
+    // should not perform the evaluation if the original type is a legal scalar
+    // type and the target type is illegal.
+    bool FromLegal = MinBitWidth == 1 || DL.isLegalInteger(OrigBitWidth);
+    bool ToLegal = MinBitWidth == 1 || DL.isLegalInteger(MinBitWidth);
+    if (!DstTy->isVectorTy() && FromLegal && !ToLegal)
+      return OrigBitWidth;
+  }
+  return MinBitWidth;
+}
+
+Type *TruncInstCombine::getBestTruncatedType() {
+  if (!buildTruncExpressionDag())
+    return nullptr;
+
+  // We don't want to duplicate instructions, which isn't profitable. Thus, we
+  // can't shrink something that has multiple users, unless all users are
+  // post-dominated by the trunc instruction, i.e., were visited during the
+  // expression evaluation.
+  unsigned DesiredBitWidth = 0;
+  for (auto Itr : InstInfoMap) {
+    Instruction *I = Itr.first;
+    if (I->hasOneUse())
+      continue;
+    bool IsExtInst = (isa<ZExtInst>(I) || isa<SExtInst>(I));
+    for (auto *U : I->users())
+      if (auto *UI = dyn_cast<Instruction>(U))
+        if (UI != CurrentTruncInst && !InstInfoMap.count(UI)) {
+          if (!IsExtInst)
+            return nullptr;
+          // If this is an extension from the dest type, we can eliminate it,
+          // even if it has multiple users. Thus, update the DesiredBitWidth and
+          // validate all extension instructions agrees on same DesiredBitWidth.
+          unsigned ExtInstBitWidth =
+              I->getOperand(0)->getType()->getScalarSizeInBits();
+          if (DesiredBitWidth && DesiredBitWidth != ExtInstBitWidth)
+            return nullptr;
+          DesiredBitWidth = ExtInstBitWidth;
+        }
+  }
+
+  unsigned OrigBitWidth =
+      CurrentTruncInst->getOperand(0)->getType()->getScalarSizeInBits();
+
+  // Calculate minimum allowed bit-width allowed for shrinking the currently
+  // visited truncate's operand.
+  unsigned MinBitWidth = getMinBitWidth();
+
+  // Check that we can shrink to smaller bit-width than original one and that
+  // it is similar to the DesiredBitWidth is such exists.
+  if (MinBitWidth >= OrigBitWidth ||
+      (DesiredBitWidth && DesiredBitWidth != MinBitWidth))
+    return nullptr;
+
+  return IntegerType::get(CurrentTruncInst->getContext(), MinBitWidth);
+}
+
+/// Given a reduced scalar type \p Ty and a \p V value, return a reduced type
+/// for \p V, according to its type, if it vector type, return the vector
+/// version of \p Ty, otherwise return \p Ty.
+static Type *getReducedType(Value *V, Type *Ty) {
+  assert(Ty && !Ty->isVectorTy() && "Expect Scalar Type");
+  if (auto *VTy = dyn_cast<VectorType>(V->getType()))
+    return VectorType::get(Ty, VTy->getNumElements());
+  return Ty;
+}
+
+Value *TruncInstCombine::getReducedOperand(Value *V, Type *SclTy) {
+  Type *Ty = getReducedType(V, SclTy);
+  if (auto *C = dyn_cast<Constant>(V)) {
+    C = ConstantExpr::getIntegerCast(C, Ty, false);
+    // If we got a constantexpr back, try to simplify it with DL info.
+    if (Constant *FoldedC = ConstantFoldConstant(C, DL, &TLI))
+      C = FoldedC;
+    return C;
+  }
+
+  auto *I = cast<Instruction>(V);
+  Info Entry = InstInfoMap.lookup(I);
+  assert(Entry.NewValue);
+  return Entry.NewValue;
+}
+
+void TruncInstCombine::ReduceExpressionDag(Type *SclTy) {
+  for (auto &Itr : InstInfoMap) { // Forward
+    Instruction *I = Itr.first;
+
+    assert(!InstInfoMap.lookup(I).NewValue && "Instruction has been evaluated");
+
+    IRBuilder<> Builder(I);
+    Value *Res = nullptr;
+    unsigned Opc = I->getOpcode();
+    switch (Opc) {
+    case Instruction::Trunc:
+    case Instruction::ZExt:
+    case Instruction::SExt: {
+      Type *Ty = getReducedType(I, SclTy);
+      // If the source type of the cast is the type we're trying for then we can
+      // just return the source.  There's no need to insert it because it is not
+      // new.
+      if (I->getOperand(0)->getType() == Ty) {
+        InstInfoMap[I].NewValue = I->getOperand(0);
+        continue;
+      }
+      // Otherwise, must be the same type of cast, so just reinsert a new one.
+      // This also handles the case of zext(trunc(x)) -> zext(x).
+      Res = Builder.CreateIntCast(I->getOperand(0), Ty,
+                                  Opc == Instruction::SExt);
+
+      // Update Worklist entries with new value if needed.
+      if (auto *NewCI = dyn_cast<TruncInst>(Res)) {
+        auto Entry = find(Worklist, I);
+        if (Entry != Worklist.end())
+          *Entry = NewCI;
+      }
+      break;
+    }
+    case Instruction::Add:
+    case Instruction::Sub:
+    case Instruction::Mul:
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Xor: {
+      Value *LHS = getReducedOperand(I->getOperand(0), SclTy);
+      Value *RHS = getReducedOperand(I->getOperand(1), SclTy);
+      Res = Builder.CreateBinOp((Instruction::BinaryOps)Opc, LHS, RHS);
+      break;
+    }
+    default:
+      llvm_unreachable("Unhandled instruction");
+    }
+
+    InstInfoMap[I].NewValue = Res;
+    cast<Instruction>(Res)->takeName(I);
+  }
+
+  Value *Res = getReducedOperand(CurrentTruncInst->getOperand(0), SclTy);
+  Type *DstTy = CurrentTruncInst->getType();
+  if (Res->getType() != DstTy) {
+    IRBuilder<> Builder(CurrentTruncInst);
+    Res = Builder.CreateIntCast(Res, DstTy, false);
+    cast<Instruction>(Res)->takeName(CurrentTruncInst);
+  }
+  CurrentTruncInst->replaceAllUsesWith(Res);
+
+  // Erase old expression dag, which was replaced by the reduced expression dag.
+  // We iterate backward, which means we visit the instruction before we visit
+  // any of its operands, this way, when we get to the operand, we already
+  // removed the instructions (from the expression dag) that uses it.
+  CurrentTruncInst->eraseFromParent();
+  for (auto I = InstInfoMap.rbegin(), E = InstInfoMap.rend(); I != E; ++I) {
+    // We still need to check that the instruction has no users before we erase
+    // it, because {SExt, ZExt}Inst Instruction might have other users that was
+    // not reduced, in such case, we need to keep that instruction.
+    if (!I->first->getNumUses())
+      I->first->eraseFromParent();
+  }
+}
+
+bool TruncInstCombine::run(Function &F) {
+  bool MadeIRChange = false;
+
+  // Collect all TruncInst in the function into the Worklist for evaluating.
+  for (auto &BB : F)
+    for (auto &I : BB)
+      if (auto *CI = dyn_cast<TruncInst>(&I))
+        Worklist.push_back(CI);
+
+  // Process all TruncInst in the Worklist, for each instruction:
+  //   1. Check if it dominates an eligible expression dag to be reduced.
+  //   2. Create a reduced expression dag and replace the old one with it.
+  while (!Worklist.empty()) {
+    CurrentTruncInst = Worklist.pop_back_val();
+
+    if (Type *NewDstSclTy = getBestTruncatedType()) {
+      DEBUG(dbgs() << "ICE: TruncInstCombine reducing type of expression dag "
+                      "dominated by: "
+                   << CurrentTruncInst << '\n');
+      ReduceExpressionDag(NewDstSclTy);
+      MadeIRChange = true;
+    }
+  }
+
+  return MadeIRChange;
+}
diff --git a/llvm/lib/Transforms/CMakeLists.txt b/llvm/lib/Transforms/CMakeLists.txt
index 67bdeb27212..74db9e53304 100644
--- a/llvm/lib/Transforms/CMakeLists.txt
+++ b/llvm/lib/Transforms/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_subdirectory(Utils)
 add_subdirectory(Instrumentation)
+add_subdirectory(AggressiveInstCombine)
 add_subdirectory(InstCombine)
 add_subdirectory(Scalar)
 add_subdirectory(IPO)
diff --git a/llvm/lib/Transforms/IPO/LLVMBuild.txt b/llvm/lib/Transforms/IPO/LLVMBuild.txt
index a8b0f32fd78..54ce23876e6 100644
--- a/llvm/lib/Transforms/IPO/LLVMBuild.txt
+++ b/llvm/lib/Transforms/IPO/LLVMBuild.txt
@@ -20,4 +20,4 @@ type = Library
 name = IPO
 parent = Transforms
 library_name = ipo
-required_libraries = Analysis BitReader BitWriter Core InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation
+required_libraries = AggressiveInstCombine Analysis BitReader BitWriter Core InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 3855e6245d8..f8dfa4d3fe7 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -318,6 +318,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
   MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
   MPM.add(createCFGSimplificationPass());     // Merge & remove BBs
   // Combine silly seq's
+  if (OptLevel > 2)
+    MPM.add(createAggressiveInstCombinerPass());
   addInstructionCombiningPass(MPM);
   if (SizeLevel == 0 && !DisableLibCallsShrinkWrap)
     MPM.add(createLibCallsShrinkWrapPass());
@@ -765,6 +767,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
   // simplification opportunities, and both can propagate functions through
   // function pointers.  When this happens, we often have to resolve varargs
   // calls, etc, so let instcombine do this.
+  if (OptLevel > 2)
+    PM.add(createAggressiveInstCombinerPass());
   addInstructionCombiningPass(PM);
   addExtensionsToPM(EP_Peephole, PM);
 
diff --git a/llvm/lib/Transforms/LLVMBuild.txt b/llvm/lib/Transforms/LLVMBuild.txt
index 95482ad2022..f061c6d9285 100644
--- a/llvm/lib/Transforms/LLVMBuild.txt
+++ b/llvm/lib/Transforms/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = Coroutines IPO InstCombine Instrumentation Scalar Utils Vectorize ObjCARC
+subdirectories = AggressiveInstCombine Coroutines IPO InstCombine Instrumentation Scalar Utils Vectorize ObjCARC
 
 [component_0]
 type = Group
diff --git a/llvm/lib/Transforms/Scalar/LLVMBuild.txt b/llvm/lib/Transforms/Scalar/LLVMBuild.txt
index 8a99df86b84..ffe35f041b3 100644
--- a/llvm/lib/Transforms/Scalar/LLVMBuild.txt
+++ b/llvm/lib/Transforms/Scalar/LLVMBuild.txt
@@ -20,4 +20,4 @@ type = Library
 name = Scalar
 parent = Transforms
 library_name = ScalarOpts
-required_libraries = Analysis Core InstCombine Support TransformUtils
+required_libraries = AggressiveInstCombine Analysis Core InstCombine Support TransformUtils
diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll
index 2be8fdeb06f..7305df147fa 100644
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -126,6 +126,7 @@
 ; CHECK-O-NEXT: Running analysis: LazyValueAnalysis
 ; CHECK-O-NEXT: Running pass: CorrelatedValuePropagationPass
 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
+; CHECK-O3-NEXT: AggressiveInstCombinePass
 ; CHECK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-O1-NEXT: Running pass: LibCallsShrinkWrapPass
 ; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass
diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll
index 878198d1447..a2d14848117 100644
--- a/llvm/test/Other/new-pm-lto-defaults.ll
+++ b/llvm/test/Other/new-pm-lto-defaults.ll
@@ -10,7 +10,8 @@
 ; RUN:     | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2
 ; RUN: opt -disable-verify -debug-pass-manager \
 ; RUN:     -passes='lto<O3>' -S  %s 2>&1 \
-; RUN:     | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2
+; RUN:     | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2 \
+; RUN:     --check-prefix=CHECK-O3
 ; RUN: opt -disable-verify -debug-pass-manager \
 ; RUN:     -passes='lto<Os>' -S %s 2>&1 \
 ; RUN:     | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2
@@ -20,7 +21,7 @@
 ; RUN: opt -disable-verify -debug-pass-manager \
 ; RUN:     -passes='lto<O3>' -S  %s -passes-ep-peephole='no-op-function' 2>&1 \
 ; RUN:     | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2 \
-; RUN:     --check-prefix=CHECK-EP-Peephole
+; RUN:     --check-prefix=CHECK-O3 --check-prefix=CHECK-EP-Peephole
 
 ; CHECK-O: Starting llvm::Module pass manager run.
 ; CHECK-O-NEXT: Running pass: PassManager<{{.*}}Module
@@ -60,6 +61,7 @@
 ; CHECK-O2-NEXT: Running pass: DeadArgumentEliminationPass
 ; CHECK-O2-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}>
 ; CHECK-O2-NEXT: Starting llvm::Function pass manager run.
+; CHECK-O3-NEXT: Running pass: AggressiveInstCombinePass
 ; CHECK-O2-NEXT: Running pass: InstCombinePass
 ; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass
 ; CHECK-O2-NEXT: Finished llvm::Function pass manager run.
diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll
index c40e46aee6e..9f4e4ae8276 100644
--- a/llvm/test/Other/new-pm-thinlto-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-defaults.ll
@@ -111,6 +111,7 @@
 ; CHECK-O-NEXT: Running analysis: LazyValueAnalysis
 ; CHECK-O-NEXT: Running pass: CorrelatedValuePropagationPass
 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
+; CHECK-O3-NEXT: Running pass: AggressiveInstCombinePass
 ; CHECK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-O1-NEXT: Running pass: LibCallsShrinkWrapPass
 ; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass
diff --git a/llvm/test/Transforms/AggressiveInstCombine/trunc_multi_uses.ll b/llvm/test/Transforms/AggressiveInstCombine/trunc_multi_uses.ll
new file mode 100644
index 00000000000..389f77d4c70
--- /dev/null
+++ b/llvm/test/Transforms/AggressiveInstCombine/trunc_multi_uses.ll
@@ -0,0 +1,214 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -aggressive-instcombine -S | FileCheck %s
+; RUN: opt < %s -passes=aggressive-instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; Aggressive Instcombine should be able to reduce width of these expressions.
+
+declare i32 @use32(i32)
+declare i32 @use64(i64)
+declare <2 x i32> @use32_vec(<2 x i32>)
+declare <2 x i32> @use64_vec(<2 x i64>)
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; These tests check cases where expression dag post-dominated by TruncInst
+;; contains instruction, which has more than one usage.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+define void @multi_uses_add(i32 %X) {
+; CHECK-LABEL: @multi_uses_add(
+; CHECK-NEXT:    [[A1:%.*]] = zext i32 [[X:%.*]] to i64
+; CHECK-NEXT:    [[B1:%.*]] = add i32 [[X]], 15
+; CHECK-NEXT:    [[C1:%.*]] = mul i32 [[B1]], [[B1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use32(i32 [[C1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use64(i64 [[A1]])
+; CHECK-NEXT:    ret void
+;
+  %A1 = zext i32 %X to i64
+  %B1 = add i64 %A1, 15
+  %C1 = mul i64 %B1, %B1
+  %T1 = trunc i64 %C1 to i32
+  call i32 @use32(i32 %T1)
+  ; make sure zext have another use that is not post-dominated by the TruncInst.
+  call i32 @use64(i64 %A1)
+  ret void
+}
+
+define void @multi_uses_or(i32 %X) {
+; CHECK-LABEL: @multi_uses_or(
+; CHECK-NEXT:    [[A1:%.*]] = zext i32 [[X:%.*]] to i64
+; CHECK-NEXT:    [[B1:%.*]] = or i32 [[X]], 15
+; CHECK-NEXT:    [[C1:%.*]] = mul i32 [[B1]], [[B1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use32(i32 [[C1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use64(i64 [[A1]])
+; CHECK-NEXT:    ret void
+;
+  %A1 = zext i32 %X to i64
+  %B1 = or i64 %A1, 15
+  %C1 = mul i64 %B1, %B1
+  %T1 = trunc i64 %C1 to i32
+  call i32 @use32(i32 %T1)
+  ; make sure zext have another use that is not post-dominated by the TruncInst.
+  call i32 @use64(i64 %A1)
+  ret void
+}
+
+define void @multi_uses_xor(i32 %X) {
+; CHECK-LABEL: @multi_uses_xor(
+; CHECK-NEXT:    [[A1:%.*]] = zext i32 [[X:%.*]] to i64
+; CHECK-NEXT:    [[B1:%.*]] = xor i32 [[X]], 15
+; CHECK-NEXT:    [[C1:%.*]] = mul i32 [[B1]], [[B1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use32(i32 [[C1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use64(i64 [[A1]])
+; CHECK-NEXT:    ret void
+;
+  %A1 = zext i32 %X to i64
+  %B1 = xor i64 %A1, 15
+  %C1 = mul i64 %B1, %B1
+  %T1 = trunc i64 %C1 to i32
+  call i32 @use32(i32 %T1)
+  ; make sure zext have another use that is not post-dominated by the TruncInst.
+  call i32 @use64(i64 %A1)
+  ret void
+}
+
+define void @multi_uses_and(i32 %X) {
+; CHECK-LABEL: @multi_uses_and(
+; CHECK-NEXT:    [[A1:%.*]] = zext i32 [[X:%.*]] to i64
+; CHECK-NEXT:    [[B1:%.*]] = and i32 [[X]], 15
+; CHECK-NEXT:    [[C1:%.*]] = mul i32 [[B1]], [[B1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use32(i32 [[C1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use64(i64 [[A1]])
+; CHECK-NEXT:    ret void
+;
+  %A1 = zext i32 %X to i64
+  %B1 = and i64 %A1, 15
+  %C1 = mul i64 %B1, %B1
+  %T1 = trunc i64 %C1 to i32
+  call i32 @use32(i32 %T1)
+  ; make sure zext have another use that is not post-dominated by the TruncInst.
+  call i32 @use64(i64 %A1)
+  ret void
+}
+
+define void @multi_uses_sub(i32 %X, i32 %Y) {
+; CHECK-LABEL: @multi_uses_sub(
+; CHECK-NEXT:    [[A1:%.*]] = zext i32 [[X:%.*]] to i64
+; CHECK-NEXT:    [[A2:%.*]] = zext i32 [[Y:%.*]] to i64
+; CHECK-NEXT:    [[B1:%.*]] = sub i32 [[X]], [[Y]]
+; CHECK-NEXT:    [[C1:%.*]] = mul i32 [[B1]], [[B1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use32(i32 [[C1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use64(i64 [[A1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @use64(i64 [[A2]])
+; CHECK-NEXT:    ret void
+;
+  %A1 = zext i32 %X to i64
+  %A2 = zext i32 %Y to i64
+  %B1 = sub i64 %A1, %A2
+  %C1 = mul i64 %B1, %B1
+  %T1 = trunc i64 %C1 to i32
+  call i32 @use32(i32 %T1)
+  ; make sure zext have another use that is not post-dominated by the TruncInst.
+  call i32 @use64(i64 %A1)
+  call i32 @use64(i64 %A2)
+  ret void
+}
+
+define void @multi_use_vec_add(<2 x i32> %X) {
+; CHECK-LABEL: @multi_use_vec_add(
+; CHECK-NEXT:    [[A1:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[B1:%.*]] = add <2 x i32> [[X]], <i32 15, i32 15>
+; CHECK-NEXT:    [[C1:%.*]] = mul <2 x i32> [[B1]], [[B1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @use32_vec(<2 x i32> [[C1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @use64_vec(<2 x i64> [[A1]])
+; CHECK-NEXT:    ret void
+;
+  %A1 = zext <2 x i32> %X to <2 x i64>
+  %B1 = add <2 x i64> %A1, <i64 15, i64 15>
+  %C1 = mul <2 x i64> %B1, %B1
+  %T1 = trunc <2 x i64> %C1 to <2 x i32>
+  call <2 x i32> @use32_vec(<2 x i32> %T1)
+  ; make sure zext have another use that is not post-dominated by the TruncInst.
+  call <2 x i32> @use64_vec(<2 x i64> %A1)
+  ret void
+}
+
+define void @multi_use_vec_or(<2 x i32> %X) {
+; CHECK-LABEL: @multi_use_vec_or(
+; CHECK-NEXT:    [[A1:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[B1:%.*]] = or <2 x i32> [[X]], <i32 15, i32 15>
+; CHECK-NEXT:    [[C1:%.*]] = mul <2 x i32> [[B1]], [[B1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @use32_vec(<2 x i32> [[C1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @use64_vec(<2 x i64> [[A1]])
+; CHECK-NEXT:    ret void
+;
+  %A1 = zext <2 x i32> %X to <2 x i64>
+  %B1 = or <2 x i64> %A1, <i64 15, i64 15>
+  %C1 = mul <2 x i64> %B1, %B1
+  %T1 = trunc <2 x i64> %C1 to <2 x i32>
+  call <2 x i32> @use32_vec(<2 x i32> %T1)
+  ; make sure zext have another use that is not post-dominated by the TruncInst.
+  call <2 x i32> @use64_vec(<2 x i64> %A1)
+  ret void
+}
+
+define void @multi_use_vec_xor(<2 x i32> %X) {
+; CHECK-LABEL: @multi_use_vec_xor(
+; CHECK-NEXT:    [[A1:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[B1:%.*]] = xor <2 x i32> [[X]], <i32 15, i32 15>
+; CHECK-NEXT:    [[C1:%.*]] = mul <2 x i32> [[B1]], [[B1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @use32_vec(<2 x i32> [[C1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @use64_vec(<2 x i64> [[A1]])
+; CHECK-NEXT:    ret void
+;
+  %A1 = zext <2 x i32> %X to <2 x i64>
+  %B1 = xor <2 x i64> %A1, <i64 15, i64 15>
+  %C1 = mul <2 x i64> %B1, %B1
+  %T1 = trunc <2 x i64> %C1 to <2 x i32>
+  call <2 x i32> @use32_vec(<2 x i32> %T1)
+  ; make sure zext have another use that is not post-dominated by the TruncInst.
+  call <2 x i32> @use64_vec(<2 x i64> %A1)
+  ret void
+}
+
+define void @multi_use_vec_and(<2 x i32> %X) {
+; CHECK-LABEL: @multi_use_vec_and(
+; CHECK-NEXT:    [[A1:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[B1:%.*]] = and <2 x i32> [[X]], <i32 15, i32 15>
+; CHECK-NEXT:    [[C1:%.*]] = mul <2 x i32> [[B1]], [[B1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @use32_vec(<2 x i32> [[C1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @use64_vec(<2 x i64> [[A1]])
+; CHECK-NEXT:    ret void
+;
+  %A1 = zext <2 x i32> %X to <2 x i64>
+  %B1 = and <2 x i64> %A1, <i64 15, i64 15>
+  %C1 = mul <2 x i64> %B1, %B1
+  %T1 = trunc <2 x i64> %C1 to <2 x i32>
+  call <2 x i32> @use32_vec(<2 x i32> %T1)
+  ; make sure zext have another use that is not post-dominated by the TruncInst.
+  call <2 x i32> @use64_vec(<2 x i64> %A1)
+  ret void
+}
+
+define void @multi_use_vec_sub(<2 x i32> %X, <2 x i32> %Y) {
+; CHECK-LABEL: @multi_use_vec_sub(
+; CHECK-NEXT:    [[A1:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[A2:%.*]] = zext <2 x i32> [[Y:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[B1:%.*]] = sub <2 x i32> [[X]], [[Y]]
+; CHECK-NEXT:    [[C1:%.*]] = mul <2 x i32> [[B1]], [[B1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @use32_vec(<2 x i32> [[C1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @use64_vec(<2 x i64> [[A1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @use64_vec(<2 x i64> [[A2]])
+; CHECK-NEXT:    ret void
+;
+  %A1 = zext <2 x i32> %X to <2 x i64>
+  %A2 = zext <2 x i32> %Y to <2 x i64>
+  %B1 = sub <2 x i64> %A1, %A2
+  %C1 = mul <2 x i64> %B1, %B1
+  %T1 = trunc <2 x i64> %C1 to <2 x i32>
+  call <2 x i32> @use32_vec(<2 x i32> %T1)
+  ; make sure zext have another use that is not post-dominated by the TruncInst.
+  call <2 x i32> @use64_vec(<2 x i64> %A1)
+  call <2 x i32> @use64_vec(<2 x i64> %A2)
+  ret void
+}
diff --git a/llvm/tools/opt/CMakeLists.txt b/llvm/tools/opt/CMakeLists.txt
index dedc25143cf..f03d1151665 100644
--- a/llvm/tools/opt/CMakeLists.txt
+++ b/llvm/tools/opt/CMakeLists.txt
@@ -1,5 +1,6 @@
 set(LLVM_LINK_COMPONENTS
   ${LLVM_TARGETS_TO_BUILD}
+  AggressiveInstCombine
   Analysis
   BitWriter
   CodeGen
diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp
index c1c84844a4a..e03f6cee39f 100644
--- a/llvm/tools/opt/opt.cpp
+++ b/llvm/tools/opt/opt.cpp
@@ -395,6 +395,7 @@ int main(int argc, char **argv) {
   initializeAnalysis(Registry);
   initializeTransformUtils(Registry);
   initializeInstCombine(Registry);
+  initializeAggressiveInstCombinerLegacyPassPass(Registry);
   initializeInstrumentation(Registry);
   initializeTarget(Registry);
   // For codegen passes, only passes that do IR to IR transformation are
author	Amjad Aboud <amjad.aboud@intel.com>	2018-01-24 12:42:42 +0000
committer	Amjad Aboud <amjad.aboud@intel.com>	2018-01-24 12:42:42 +0000
commit	e4453233d78788989c4bf2ff927a9e67433fb63d (patch)
tree	852418eb4b403be8210679108562c8ddd6fb0033
parent	f26df4783132de2a534572a53847716a89d98339 (diff)
download	bcm5719-llvm-e4453233d78788989c4bf2ff927a9e67433fb63d.tar.gz bcm5719-llvm-e4453233d78788989c4bf2ff927a9e67433fb63d.zip