summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2017-01-24 17:46:17 +0000
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2017-01-24 17:46:17 +0000
commit22a56f2f5a1fd0c248260b597f85586973294523 (patch)
tree60ad42a8348672186089e0bc98b79e031e6b0d7c
parent310c3d3d26b81a816119e2e3d3fc9d03a75ee075 (diff)
downloadbcm5719-llvm-22a56f2f5a1fd0c248260b597f85586973294523.tar.gz
bcm5719-llvm-22a56f2f5a1fd0c248260b597f85586973294523.zip
[AMDGPU] Add VGPR copies post regalloc fix pass
Regalloc creates COPY instructions which do not formally use VALU. That results in v_mov instructions displaced after exec mask modification. One pass which do it is SIOptimizeExecMasking, but potentially it can be done by other passes too. This patch adds a pass immediately after regalloc to add implicit exec use operand to all VGPR copy instructions. Differential Revision: https://reviews.llvm.org/D28874 llvm-svn: 292956
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp72
-rw-r--r--llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir44
5 files changed, 122 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 7b0a7f4b605..78876ab4f9e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -60,6 +60,9 @@ extern char &SIShrinkInstructionsID;
void initializeSIFixSGPRCopiesPass(PassRegistry &);
extern char &SIFixSGPRCopiesID;
+void initializeSIFixVGPRCopiesPass(PassRegistry &);
+extern char &SIFixVGPRCopiesID;
+
void initializeSILowerI1CopiesPass(PassRegistry &);
extern char &SILowerI1CopiesID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index d8a0c716279..06adb372305 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -86,6 +86,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
PassRegistry *PR = PassRegistry::getPassRegistry();
initializeSILowerI1CopiesPass(*PR);
initializeSIFixSGPRCopiesPass(*PR);
+ initializeSIFixVGPRCopiesPass(*PR);
initializeSIFoldOperandsPass(*PR);
initializeSIShrinkInstructionsPass(*PR);
initializeSIFixControlFlowLiveIntervalsPass(*PR);
@@ -615,6 +616,7 @@ void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
}
void GCNPassConfig::addPostRegAlloc() {
+ addPass(&SIFixVGPRCopiesID);
addPass(&SIOptimizeExecMaskingID);
TargetPassConfig::addPostRegAlloc();
}
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 02d441756c8..bbd06b19d63 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -68,6 +68,7 @@ add_llvm_target(AMDGPUCodeGen
SIDebuggerInsertNops.cpp
SIFixControlFlowLiveIntervals.cpp
SIFixSGPRCopies.cpp
+ SIFixVGPRCopies.cpp
SIFoldOperands.cpp
SIFrameLowering.cpp
SIInsertSkips.cpp
diff --git a/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
new file mode 100644
index 00000000000..3d3121788b5
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
@@ -0,0 +1,72 @@
+//===-- SIFixVGPRCopies.cpp - Fix VGPR Copies after regalloc --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Add implicit use of exec to vector register copies.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-fix-vgpr-copies"
+
+namespace {
+
+class SIFixVGPRCopies : public MachineFunctionPass {
+public:
+ static char ID;
+
+public:
+ SIFixVGPRCopies() : MachineFunctionPass(ID) {
+ initializeSIFixVGPRCopiesPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override { return "SI Fix VGPR copies"; }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS(SIFixVGPRCopies, DEBUG_TYPE, "SI Fix VGPR copies", false, false)
+
+char SIFixVGPRCopies::ID = 0;
+
+char &llvm::SIFixVGPRCopiesID = SIFixVGPRCopies::ID;
+
+bool SIFixVGPRCopies::runOnMachineFunction(MachineFunction &MF) {
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ bool Changed = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ switch (MI.getOpcode()) {
+ case AMDGPU::COPY:
+ if (TII->isVGPRCopy(MI) && !MI.readsRegister(AMDGPU::EXEC, TRI)) {
+ MI.addOperand(MF,
+ MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
+ DEBUG(dbgs() << "Add exec use to " << MI);
+ Changed = true;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir
new file mode 100644
index 00000000000..4951e0df4d3
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir
@@ -0,0 +1,44 @@
+# RUN: llc -march=amdgcn -start-after=greedy -stop-after=si-optimize-exec-masking -o - %s | FileCheck %s
+# Check that we first do all vector instructions and only then change exec
+# CHECK-DAG: COPY %vgpr10_vgpr11
+# CHECK-DAG: COPY %vgpr12_vgpr13
+# CHECK: %exec = COPY
+
+---
+name: main
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+liveins:
+ - { reg: '%sgpr4_sgpr5' }
+ - { reg: '%sgpr6' }
+ - { reg: '%vgpr0' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 4
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0.entry:
+ liveins: %vgpr3, %vgpr10_vgpr11, %vgpr12_vgpr13
+
+ %vcc = V_CMP_NE_U32_e64 0, killed %vgpr3, implicit %exec
+ %sgpr4_sgpr5 = COPY %exec, implicit-def %exec
+ %sgpr6_sgpr7 = S_AND_B64 %sgpr4_sgpr5, killed %vcc, implicit-def dead %scc
+ %sgpr4_sgpr5 = S_XOR_B64 %sgpr6_sgpr7, killed %sgpr4_sgpr5, implicit-def dead %scc
+ %vgpr61_vgpr62 = COPY %vgpr10_vgpr11
+ %vgpr155_vgpr156 = COPY %vgpr12_vgpr13
+ %exec = S_MOV_B64_term killed %sgpr6_sgpr7
+...
OpenPOWER on IntegriCloud