summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp44
1 files changed, 30 insertions, 14 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 54a28fde83f..162bbc2f91c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -45,6 +45,18 @@ static cl::opt<bool> EnableR600StructurizeCFG(
cl::desc("Use StructurizeCFG IR pass"),
cl::init(true));
+static cl::opt<bool> EnableSROA(
+ "amdgpu-sroa",
+ cl::desc("Run SROA after promote alloca pass"),
+ cl::ReallyHidden,
+ cl::init(true));
+
+static cl::opt<bool> EnableR600IfConvert(
+ "r600-if-convert",
+ cl::desc("Use if conversion pass"),
+ cl::ReallyHidden,
+ cl::init(true));
+
extern "C" void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget);
@@ -212,12 +224,7 @@ public:
}
ScheduleDAGInstrs *
- createMachineScheduler(MachineSchedContext *C) const override {
- const SISubtarget *ST = getGCNTargetMachine().getSubtargetImpl();
- if (ST->enableSIScheduler())
- return createSIMachineScheduler(C);
- return nullptr;
- }
+ createMachineScheduler(MachineSchedContext *C) const override;
bool addPreISel() override;
void addMachineSSAOptimization() override;
@@ -285,10 +292,11 @@ void AMDGPUPassConfig::addIRPasses() {
addPass(createAMDGPUOpenCLImageTypeLoweringPass());
const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
- const AMDGPUSubtarget &ST = *TM.getSubtargetImpl();
- if (TM.getOptLevel() > CodeGenOpt::None && ST.isPromoteAllocaEnabled()) {
+ if (TM.getOptLevel() > CodeGenOpt::None) {
addPass(createAMDGPUPromoteAlloca(&TM));
- addPass(createSROAPass());
+
+ if (EnableSROA)
+ addPass(createSROAPass());
}
addStraightLineScalarOptimizationPasses();
@@ -344,9 +352,8 @@ void R600PassConfig::addPreRegAlloc() {
}
void R600PassConfig::addPreSched2() {
- const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
addPass(createR600EmitClauseMarkers(), false);
- if (ST.isIfCvtEnabled())
+ if (EnableR600IfConvert)
addPass(&IfConverterID, false);
addPass(createR600ClauseMergePass(*TM), false);
}
@@ -367,6 +374,14 @@ TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
// GCN Pass Setup
//===----------------------------------------------------------------------===//
+ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
+ MachineSchedContext *C) const {
+ const SISubtarget &ST = C->MF->getSubtarget<SISubtarget>();
+ if (ST.enableSIScheduler())
+ return createSIMachineScheduler(C);
+ return nullptr;
+}
+
bool GCNPassConfig::addPreISel() {
AMDGPUPassConfig::addPreISel();
@@ -415,8 +430,6 @@ bool GCNPassConfig::addRegBankSelect() {
#endif
void GCNPassConfig::addPreRegAlloc() {
- const SISubtarget &ST = *getGCNTargetMachine().getSubtargetImpl();
-
// This needs to be run directly before register allocation because
// earlier passes might recompute live intervals.
// TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass
@@ -424,15 +437,18 @@ void GCNPassConfig::addPreRegAlloc() {
insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);
}
- if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) {
+ if (getOptLevel() > CodeGenOpt::None) {
// Don't do this with no optimizations since it throws away debug info by
// merging nonadjacent loads.
// This should be run after scheduling, but before register allocation. It
// also need extra copies to the address operand to be eliminated.
+
+ // FIXME: Move pre-RA and remove extra reg coalescer run.
insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
insertPass(&MachineSchedulerID, &RegisterCoalescerID);
}
+
addPass(createSIShrinkInstructionsPass());
addPass(createSIWholeQuadModePass());
}
OpenPOWER on IntegriCloud