diff options
| author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2017-01-24 17:46:17 +0000 |
|---|---|---|
| committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2017-01-24 17:46:17 +0000 |
| commit | 22a56f2f5a1fd0c248260b597f85586973294523 (patch) | |
| tree | 60ad42a8348672186089e0bc98b79e031e6b0d7c /llvm | |
| parent | 310c3d3d26b81a816119e2e3d3fc9d03a75ee075 (diff) | |
| download | bcm5719-llvm-22a56f2f5a1fd0c248260b597f85586973294523.tar.gz bcm5719-llvm-22a56f2f5a1fd0c248260b597f85586973294523.zip | |
[AMDGPU] Add VGPR copies post regalloc fix pass
Regalloc creates COPY instructions which do not formally use VALU.
That results in v_mov instructions displaced after exec mask modification.
One pass which do it is SIOptimizeExecMasking, but potentially it can be
done by other passes too.
This patch adds a pass immediately after regalloc to add implicit exec
use operand to all VGPR copy instructions.
Differential Revision: https://reviews.llvm.org/D28874
llvm-svn: 292956
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp | 72 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir | 44 |
5 files changed, 122 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 7b0a7f4b605..78876ab4f9e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -60,6 +60,9 @@ extern char &SIShrinkInstructionsID; void initializeSIFixSGPRCopiesPass(PassRegistry &); extern char &SIFixSGPRCopiesID; +void initializeSIFixVGPRCopiesPass(PassRegistry &); +extern char &SIFixVGPRCopiesID; + void initializeSILowerI1CopiesPass(PassRegistry &); extern char &SILowerI1CopiesID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index d8a0c716279..06adb372305 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -86,6 +86,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() { PassRegistry *PR = PassRegistry::getPassRegistry(); initializeSILowerI1CopiesPass(*PR); initializeSIFixSGPRCopiesPass(*PR); + initializeSIFixVGPRCopiesPass(*PR); initializeSIFoldOperandsPass(*PR); initializeSIShrinkInstructionsPass(*PR); initializeSIFixControlFlowLiveIntervalsPass(*PR); @@ -615,6 +616,7 @@ void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { } void GCNPassConfig::addPostRegAlloc() { + addPass(&SIFixVGPRCopiesID); addPass(&SIOptimizeExecMaskingID); TargetPassConfig::addPostRegAlloc(); } diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index 02d441756c8..bbd06b19d63 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -68,6 +68,7 @@ add_llvm_target(AMDGPUCodeGen SIDebuggerInsertNops.cpp SIFixControlFlowLiveIntervals.cpp SIFixSGPRCopies.cpp + SIFixVGPRCopies.cpp SIFoldOperands.cpp SIFrameLowering.cpp SIInsertSkips.cpp diff --git a/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp new file mode 100644 index 00000000000..3d3121788b5 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp @@ -0,0 +1,72 @@ +//===-- SIFixVGPRCopies.cpp - Fix VGPR Copies after regalloc --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Add implicit use of exec to vector register copies. +/// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUSubtarget.h" +#include "SIInstrInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +using namespace llvm; + +#define DEBUG_TYPE "si-fix-vgpr-copies" + +namespace { + +class SIFixVGPRCopies : public MachineFunctionPass { +public: + static char ID; + +public: + SIFixVGPRCopies() : MachineFunctionPass(ID) { + initializeSIFixVGPRCopiesPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { return "SI Fix VGPR copies"; } +}; + +} // End anonymous namespace. + +INITIALIZE_PASS(SIFixVGPRCopies, DEBUG_TYPE, "SI Fix VGPR copies", false, false) + +char SIFixVGPRCopies::ID = 0; + +char &llvm::SIFixVGPRCopiesID = SIFixVGPRCopies::ID; + +bool SIFixVGPRCopies::runOnMachineFunction(MachineFunction &MF) { + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + const SIInstrInfo *TII = ST.getInstrInfo(); + bool Changed = false; + + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + switch (MI.getOpcode()) { + case AMDGPU::COPY: + if (TII->isVGPRCopy(MI) && !MI.readsRegister(AMDGPU::EXEC, TRI)) { + MI.addOperand(MF, + MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); + DEBUG(dbgs() << "Add exec use to " << MI); + Changed = true; + } + break; + default: + break; + } + } + } + + return Changed; +} diff --git a/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir new file mode 100644 index 00000000000..4951e0df4d3 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir @@ -0,0 +1,44 @@ +# RUN: llc -march=amdgcn -start-after=greedy -stop-after=si-optimize-exec-masking -o - %s | FileCheck %s +# Check that we first do all vector instructions and only then change exec +# CHECK-DAG: COPY %vgpr10_vgpr11 +# CHECK-DAG: COPY %vgpr12_vgpr13 +# CHECK: %exec = COPY + +--- +name: main +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%sgpr4_sgpr5' } + - { reg: '%sgpr6' } + - { reg: '%vgpr0' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + liveins: %vgpr3, %vgpr10_vgpr11, %vgpr12_vgpr13 + + %vcc = V_CMP_NE_U32_e64 0, killed %vgpr3, implicit %exec + %sgpr4_sgpr5 = COPY %exec, implicit-def %exec + %sgpr6_sgpr7 = S_AND_B64 %sgpr4_sgpr5, killed %vcc, implicit-def dead %scc + %sgpr4_sgpr5 = S_XOR_B64 %sgpr6_sgpr7, killed %sgpr4_sgpr5, implicit-def dead %scc + %vgpr61_vgpr62 = COPY %vgpr10_vgpr11 + %vgpr155_vgpr156 = COPY %vgpr12_vgpr13 + %exec = S_MOV_B64_term killed %sgpr6_sgpr7 +... |

