summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/LinkAllPasses.h1
-rw-r--r--llvm/include/llvm/Transforms/Scalar.h4
-rw-r--r--llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp48
-rw-r--r--llvm/test/Transforms/SpeculativeExecution/divergent-target.ll22
4 files changed, 70 insertions, 5 deletions
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index 158b2d0576b..568d8c4c235 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -186,6 +186,7 @@ namespace {
(void) llvm::createScalarizerPass();
(void) llvm::createSeparateConstOffsetFromGEPPass();
(void) llvm::createSpeculativeExecutionPass();
+ (void) llvm::createSpeculativeExecutionIfHasBranchDivergencePass();
(void) llvm::createRewriteSymbolsPass();
(void) llvm::createStraightLineStrengthReducePass();
(void) llvm::createMemDerefPrinter();
diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
index 774d8261bca..7ed88fbf43f 100644
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -430,6 +430,10 @@ createSeparateConstOffsetFromGEPPass(const TargetMachine *TM = nullptr,
//
FunctionPass *createSpeculativeExecutionPass();
+// Same as createSpeculativeExecutionPass, but does nothing unless
+// TargetTransformInfo::hasBranchDivergence() is true.
+FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass();
+
//===----------------------------------------------------------------------===//
//
// LoadCombine - Combine loads into bigger loads.
diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index 147d615488f..6b82943abf3 100644
--- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -50,6 +50,15 @@
// aggressive speculation while counting on later passes to either capitalize on
// that or clean it up.
//
+// If the pass was created by calling
+// createSpeculativeExecutionIfHasBranchDivergencePass or the
+// -spec-exec-only-if-divergent-target option is present, this pass only has an
+// effect on targets where TargetTransformInfo::hasBranchDivergence() is true;
+// on other targets, it is a nop.
+//
+// This lets you include this pass unconditionally in the IR pass pipeline, but
+// only enable it for relevant targets.
+//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallSet.h"
@@ -83,19 +92,39 @@ static cl::opt<unsigned> SpecExecMaxNotHoisted(
"number of instructions that would not be speculatively executed "
"exceeds this limit."));
+static cl::opt<bool> SpecExecOnlyIfDivergentTarget(
+ "spec-exec-only-if-divergent-target", cl::init(0), cl::Hidden,
+ cl::desc("Speculative execution is applied only to targets with divergent "
+ "branches, even if the pass was configured to apply only to all "
+ "targets."));
+
namespace {
+
class SpeculativeExecution : public FunctionPass {
public:
- static char ID;
- SpeculativeExecution(): FunctionPass(ID) {}
+ static char ID;
+ explicit SpeculativeExecution(bool OnlyIfDivergentTarget = false)
+ : FunctionPass(ID),
+ OnlyIfDivergentTarget(OnlyIfDivergentTarget ||
+ SpecExecOnlyIfDivergentTarget) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnFunction(Function &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool runOnFunction(Function &F) override;
+ const char *getPassName() const override {
+ if (OnlyIfDivergentTarget)
+ return "Speculatively execute instructions if target has divergent "
+ "branches";
+ return "Speculatively execute instructions";
+ }
private:
bool runOnBasicBlock(BasicBlock &B);
bool considerHoistingFromTo(BasicBlock &FromBlock, BasicBlock &ToBlock);
+ // If true, this pass is a nop unless the target Targetitecture has branch
+ // divergence.
+ const bool OnlyIfDivergentTarget;
const TargetTransformInfo *TTI = nullptr;
};
} // namespace
@@ -105,7 +134,7 @@ INITIALIZE_PASS_BEGIN(SpeculativeExecution, "speculative-execution",
"Speculatively execute instructions", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(SpeculativeExecution, "speculative-execution",
- "Speculatively execute instructions", false, false)
+ "Speculatively execute instructions", false, false)
void SpeculativeExecution::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetTransformInfoWrapperPass>();
@@ -116,6 +145,11 @@ bool SpeculativeExecution::runOnFunction(Function &F) {
return false;
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ if (OnlyIfDivergentTarget && !TTI->hasBranchDivergence()) {
+ DEBUG(dbgs() << "Not running SpeculativeExecution because "
+ "TTI->hasBranchDivergence() is false.\n");
+ return false;
+ }
bool Changed = false;
for (auto& B : F) {
@@ -240,4 +274,8 @@ FunctionPass *createSpeculativeExecutionPass() {
return new SpeculativeExecution();
}
+FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass() {
+ return new SpeculativeExecution(/* OnlyIfDivergentTarget = */ true);
+}
+
} // namespace llvm
diff --git a/llvm/test/Transforms/SpeculativeExecution/divergent-target.ll b/llvm/test/Transforms/SpeculativeExecution/divergent-target.ll
new file mode 100644
index 00000000000..d3f2a3fa0d3
--- /dev/null
+++ b/llvm/test/Transforms/SpeculativeExecution/divergent-target.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -speculative-execution | \
+; RUN: FileCheck --check-prefix=ON %s
+; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -speculative-execution \
+; RUN: -spec-exec-only-if-divergent-target | \
+; RUN: FileCheck --check-prefix=ON %s
+; RUN: opt < %s -S -march=x86_64 -speculative-execution \
+; RUN: -spec-exec-only-if-divergent-target | \
+; RUN: FileCheck --check-prefix=OFF %s
+
+; Hoist in if-then pattern.
+define void @f() {
+; ON: %x = add i32 2, 3
+; ON: br i1 true
+; OFF: br i1 true
+; OFF: %x = add i32 2, 3
+ br i1 true, label %a, label %b
+a:
+ %x = add i32 2, 3
+ br label %b
+b:
+ ret void
+}
OpenPOWER on IntegriCloud