diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 44 |
1 files changed, 30 insertions, 14 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 54a28fde83f..162bbc2f91c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -45,6 +45,18 @@ static cl::opt<bool> EnableR600StructurizeCFG( cl::desc("Use StructurizeCFG IR pass"), cl::init(true)); +static cl::opt<bool> EnableSROA( + "amdgpu-sroa", + cl::desc("Run SROA after promote alloca pass"), + cl::ReallyHidden, + cl::init(true)); + +static cl::opt<bool> EnableR600IfConvert( + "r600-if-convert", + cl::desc("Use if conversion pass"), + cl::ReallyHidden, + cl::init(true)); + extern "C" void LLVMInitializeAMDGPUTarget() { // Register the target RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget); @@ -212,12 +224,7 @@ public: } ScheduleDAGInstrs * - createMachineScheduler(MachineSchedContext *C) const override { - const SISubtarget *ST = getGCNTargetMachine().getSubtargetImpl(); - if (ST->enableSIScheduler()) - return createSIMachineScheduler(C); - return nullptr; - } + createMachineScheduler(MachineSchedContext *C) const override; bool addPreISel() override; void addMachineSSAOptimization() override; @@ -285,10 +292,11 @@ void AMDGPUPassConfig::addIRPasses() { addPass(createAMDGPUOpenCLImageTypeLoweringPass()); const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine(); - const AMDGPUSubtarget &ST = *TM.getSubtargetImpl(); - if (TM.getOptLevel() > CodeGenOpt::None && ST.isPromoteAllocaEnabled()) { + if (TM.getOptLevel() > CodeGenOpt::None) { addPass(createAMDGPUPromoteAlloca(&TM)); - addPass(createSROAPass()); + + if (EnableSROA) + addPass(createSROAPass()); } addStraightLineScalarOptimizationPasses(); @@ -344,9 +352,8 @@ void R600PassConfig::addPreRegAlloc() { } void R600PassConfig::addPreSched2() { - const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); addPass(createR600EmitClauseMarkers(), false); - if (ST.isIfCvtEnabled()) + if (EnableR600IfConvert) addPass(&IfConverterID, false); addPass(createR600ClauseMergePass(*TM), false); } @@ -367,6 +374,14 @@ TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) { // GCN Pass Setup //===----------------------------------------------------------------------===// +ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler( + MachineSchedContext *C) const { + const SISubtarget &ST = C->MF->getSubtarget<SISubtarget>(); + if (ST.enableSIScheduler()) + return createSIMachineScheduler(C); + return nullptr; +} + bool GCNPassConfig::addPreISel() { AMDGPUPassConfig::addPreISel(); @@ -415,8 +430,6 @@ bool GCNPassConfig::addRegBankSelect() { #endif void GCNPassConfig::addPreRegAlloc() { - const SISubtarget &ST = *getGCNTargetMachine().getSubtargetImpl(); - // This needs to be run directly before register allocation because // earlier passes might recompute live intervals. // TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass @@ -424,15 +437,18 @@ void GCNPassConfig::addPreRegAlloc() { insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID); } - if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) { + if (getOptLevel() > CodeGenOpt::None) { // Don't do this with no optimizations since it throws away debug info by // merging nonadjacent loads. // This should be run after scheduling, but before register allocation. It // also need extra copies to the address operand to be eliminated. + + // FIXME: Move pre-RA and remove extra reg coalescer run. insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID); insertPass(&MachineSchedulerID, &RegisterCoalescerID); } + addPass(createSIShrinkInstructionsPass()); addPass(createSIWholeQuadModePass()); } |