diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-06-27 20:32:13 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-06-27 20:32:13 +0000 |
commit | 03d8584590c647add646df7557b2e448633ce77c (patch) | |
tree | f3b960436a3e4bc947e267a97ca8644a6d098a4a /llvm/lib/Target | |
parent | 5cdf699daafe87163242a1cc9b4109fd3cb576ff (diff) | |
download | bcm5719-llvm-03d8584590c647add646df7557b2e448633ce77c.tar.gz bcm5719-llvm-03d8584590c647add646df7557b2e448633ce77c.zip |
AMDGPU: Move subtarget feature checks into passes
llvm-svn: 273937
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.td | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 44 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 3 |
6 files changed, 37 insertions, 28 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 7736fd6c4cf..607e8d9bfdd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -241,12 +241,6 @@ def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature < "Force using DS instruction immediate offsets on SI" >; -def FeatureIfCvt : SubtargetFeature <"disable-ifcvt", - "EnableIfCvt", - "false", - "Disable the if conversion pass" ->; - def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler", "EnableSIScheduler", "true", diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 17b45fa65f1..fa8709e4f2b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -124,6 +124,10 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { if (!TM || skipFunction(F)) return false; + const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F); + if (!ST.isPromoteAllocaEnabled()) + return false; + FunctionType *FTy = F.getFunctionType(); // If the function has any arguments in the local address space, then it's @@ -139,8 +143,6 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { } } - const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F); - LocalMemLimit = ST.getLocalMemorySize(); if (LocalMemLimit == 0) return false; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index e973f8e4837..39032b682e1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -105,7 +105,6 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, EnableVGPRSpilling(false), EnablePromoteAlloca(false), - EnableIfCvt(true), EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false), EnableSIScheduler(false), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 53117e3cb60..9a0adf1b166 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -82,7 +82,6 @@ protected: // Used as options. bool EnableVGPRSpilling; bool EnablePromoteAlloca; - bool EnableIfCvt; bool EnableLoadStoreOpt; bool EnableUnsafeDSOffsetFolding; bool EnableSIScheduler; @@ -222,10 +221,6 @@ public: return EnablePromoteAlloca; } - bool isIfCvtEnabled() const { - return EnableIfCvt; - } - bool unsafeDSOffsetFoldingEnabled() const { return EnableUnsafeDSOffsetFolding; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 54a28fde83f..162bbc2f91c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -45,6 +45,18 @@ static cl::opt<bool> EnableR600StructurizeCFG( cl::desc("Use StructurizeCFG IR pass"), cl::init(true)); +static cl::opt<bool> EnableSROA( + "amdgpu-sroa", + cl::desc("Run SROA after promote alloca pass"), + cl::ReallyHidden, + cl::init(true)); + +static cl::opt<bool> EnableR600IfConvert( + "r600-if-convert", + cl::desc("Use if conversion pass"), + cl::ReallyHidden, + cl::init(true)); + extern "C" void LLVMInitializeAMDGPUTarget() { // Register the target RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget); @@ -212,12 +224,7 @@ public: } ScheduleDAGInstrs * - createMachineScheduler(MachineSchedContext *C) const override { - const SISubtarget *ST = getGCNTargetMachine().getSubtargetImpl(); - if (ST->enableSIScheduler()) - return createSIMachineScheduler(C); - return nullptr; - } + createMachineScheduler(MachineSchedContext *C) const override; bool addPreISel() override; void addMachineSSAOptimization() override; @@ -285,10 +292,11 @@ void AMDGPUPassConfig::addIRPasses() { addPass(createAMDGPUOpenCLImageTypeLoweringPass()); const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine(); - const AMDGPUSubtarget &ST = *TM.getSubtargetImpl(); - if (TM.getOptLevel() > CodeGenOpt::None && ST.isPromoteAllocaEnabled()) { + if (TM.getOptLevel() > CodeGenOpt::None) { addPass(createAMDGPUPromoteAlloca(&TM)); - addPass(createSROAPass()); + + if (EnableSROA) + addPass(createSROAPass()); } addStraightLineScalarOptimizationPasses(); @@ -344,9 +352,8 @@ void R600PassConfig::addPreRegAlloc() { } void R600PassConfig::addPreSched2() { - const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); addPass(createR600EmitClauseMarkers(), false); - if (ST.isIfCvtEnabled()) + if (EnableR600IfConvert) addPass(&IfConverterID, false); addPass(createR600ClauseMergePass(*TM), false); } @@ -367,6 +374,14 @@ TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) { // GCN Pass Setup //===----------------------------------------------------------------------===// +ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler( + MachineSchedContext *C) const { + const SISubtarget &ST = C->MF->getSubtarget<SISubtarget>(); + if (ST.enableSIScheduler()) + return createSIMachineScheduler(C); + return nullptr; +} + bool GCNPassConfig::addPreISel() { AMDGPUPassConfig::addPreISel(); @@ -415,8 +430,6 @@ bool GCNPassConfig::addRegBankSelect() { #endif void GCNPassConfig::addPreRegAlloc() { - const SISubtarget &ST = *getGCNTargetMachine().getSubtargetImpl(); - // This needs to be run directly before register allocation because // earlier passes might recompute live intervals. // TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass @@ -424,15 +437,18 @@ void GCNPassConfig::addPreRegAlloc() { insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID); } - if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) { + if (getOptLevel() > CodeGenOpt::None) { // Don't do this with no optimizations since it throws away debug info by // merging nonadjacent loads. // This should be run after scheduling, but before register allocation. It // also need extra copies to the address operand to be eliminated. + + // FIXME: Move pre-RA and remove extra reg coalescer run. insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID); insertPass(&MachineSchedulerID, &RegisterCoalescerID); } + addPass(createSIShrinkInstructionsPass()); addPass(createSIWholeQuadModePass()); } diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 0b9b29a54b5..9e972a569a0 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -412,6 +412,9 @@ bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) { return false; const SISubtarget &STM = MF.getSubtarget<SISubtarget>(); + if (!STM.loadStoreOptEnabled()) + return false; + TII = STM.getInstrInfo(); TRI = &TII->getRegisterInfo(); |