summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2018-05-31 20:13:51 +0000
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2018-05-31 20:13:51 +0000
commit739174c4be36d13a22bad1187e444f4a804db407 (patch)
tree07ca122e021d1251fe1d3409d014eae96f25bcf1 /llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
parenta6922415b163a46a860c93d4d4e042320363e7c7 (diff)
downloadbcm5719-llvm-739174c4be36d13a22bad1187e444f4a804db407.tar.gz
bcm5719-llvm-739174c4be36d13a22bad1187e444f4a804db407.zip
[AMDGPU] Construct memory clauses before RA
Memory clauses are formed into bundles in presence of xnack. Their source operands are marked as early-clobber. This allows to allocate distinct source and destination registers within a clause and prevent breaking the clause with s_nop in the hazard recognizer. Clauses are undone before post-RA scheduler to allow some rescheduling, which will not break the clause since artificial edges are created in the dag to keep memory operations together. Yet this allows a better ILP in some cases. Differential Revision: https://reviews.llvm.org/D47511 llvm-svn: 333691
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp3
1 files changed, 3 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 74a1de03dda..d361f51bca1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -171,6 +171,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeSIDebuggerInsertNopsPass(*PR);
initializeSIOptimizeExecMaskingPass(*PR);
initializeSIFixWWMLivenessPass(*PR);
+ initializeSIFormMemoryClausesPass(*PR);
initializeAMDGPUUnifyDivergentExitNodesPass(*PR);
initializeAMDGPUAAWrapperPassPass(*PR);
initializeAMDGPUUseNativeCallsPass(*PR);
@@ -847,6 +848,8 @@ void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID);
+ insertPass(&SIOptimizeExecMaskingPreRAID, &SIFormMemoryClausesID);
+
// This must be run immediately after phi elimination and before
// TwoAddressInstructions, otherwise the processing of the tied operand of
// SI_ELSE will introduce a copy of the tied operand source after the else.
OpenPOWER on IntegriCloud