From 37e7f959c0a381a0eddaed465426bb3605a1ef44 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Tue, 1 Aug 2017 23:14:32 +0000 Subject: [AMDGPU] Collapse adjacent SI_END_CF Add a pass to remove redundant S_OR_B64 instructions enabling lanes in the exec. If two SI_END_CF (lowered as S_OR_B64) come together without any vector instructions between them we can only keep outer SI_END_CF, given that CFG is structured and exec bits of the outer end statement are always not less than exec bit of the inner one. This needs to be done before the RA to eliminate saved exec bits registers but after register coalescer to have no vector registers copies in between of different end cf statements. Differential Revision: https://reviews.llvm.org/D35967 llvm-svn: 309762 --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp') diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 571ee97ff36..714aebbafae 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -142,6 +142,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() { initializeSIPeepholeSDWAPass(*PR); initializeSIShrinkInstructionsPass(*PR); initializeSIFixControlFlowLiveIntervalsPass(*PR); + initializeSIOptimizeExecMaskingPreRAPass(*PR); initializeSILoadStoreOptimizerPass(*PR); initializeAMDGPUAlwaysInlinePass(*PR); initializeAMDGPUAnnotateKernelFeaturesPass(*PR); @@ -781,6 +782,9 @@ void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { } void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { + if (getOptLevel() > CodeGenOpt::None) + insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID); + // This needs to be run directly before register allocation because earlier // passes might recompute live intervals. insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID); -- cgit v1.2.3