AMDGPU: Add macro fusion schedule DAG mutation

Try to increase opportunities to shrink vcc uses. llvm-svn: 307313
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2017-07-06 20:57:05 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2017-07-06 20:57:05 +0000
commit: 9aa45f047f303b6484afce6716472b3b1f510c7e (patch)
tree: d25023848a172579a8d26d9860316edaffcee5d0 /llvm/lib
parent: a81198d82de20e14efce44826e302785bfda093a (diff)
download: bcm5719-llvm-9aa45f047f303b6484afce6716472b3b1f510c7e.tar.gz
bcm5719-llvm-9aa45f047f303b6484afce6716472b3b1f510c7e.zip
4 files changed, 86 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
new file mode 100644
index 00000000000..7263ba73d15
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
@@ -0,0 +1,64 @@
+//===--- AMDGPUMacroFusion.cpp - AMDGPU Macro Fusion ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains the AMDGPU implementation of the DAG scheduling
+///  mutation to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMacroFusion.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+
+#include "llvm/CodeGen/MacroFusion.h"
+
+using namespace llvm;
+
+namespace {
+
+/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused
+/// together. Given SecondMI, when FirstMI is unspecified, then check if
+/// SecondMI may be part of a fused pair at all.
+static bool shouldScheduleAdjacent(const TargetInstrInfo &TII_,
+                                   const TargetSubtargetInfo &TSI,
+                                   const MachineInstr *FirstMI,
+                                   const MachineInstr &SecondMI) {
+  const SIInstrInfo &TII = static_cast<const SIInstrInfo&>(TII_);
+
+  switch (SecondMI.getOpcode()) {
+  case AMDGPU::V_ADDC_U32_e64:
+  case AMDGPU::V_SUBB_U32_e64:
+  case AMDGPU::V_CNDMASK_B32_e64: {
+    // Try to cluster defs of condition registers to their uses. This improves
+    // the chance VCC will be available which will allow shrinking to VOP2
+    // encodings.
+    if (!FirstMI)
+      return true;
+
+    const MachineOperand *Src2 = TII.getNamedOperand(SecondMI,
+                                                     AMDGPU::OpName::src2);
+    return FirstMI->definesRegister(Src2->getReg());
+  }
+  default:
+    return false;
+  }
+
+  return false;
+}
+
+} // end namespace
+
+
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation> createAMDGPUMacroFusionDAGMutation () {
+  return createMacroFusionDAGMutation(shouldScheduleAdjacent);
+}
+
+} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.h b/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.h
new file mode 100644
index 00000000000..844958580a6
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.h
@@ -0,0 +1,19 @@
+//===- AMDGPUMacroFusion.h - AMDGPU Macro Fusion ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineScheduler.h"
+
+namespace llvm {
+
+/// Note that you have to add:
+///   DAG.addMutation(createAMDGPUMacroFusionDAGMutation());
+/// to AMDGPUPassConfig::createMachineScheduler() to have an effect.
+std::unique_ptr<ScheduleDAGMutation> createAMDGPUMacroFusionDAGMutation();
+
+} // llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 87d8e714d66..dc868f010d8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -19,6 +19,7 @@
 #include "AMDGPUCallLowering.h"
 #include "AMDGPUInstructionSelector.h"
 #include "AMDGPULegalizerInfo.h"
+#include "AMDGPUMacroFusion.h"
 #include "AMDGPUTargetObjectFile.h"
 #include "AMDGPUTargetTransformInfo.h"
 #include "GCNIterativeScheduler.h"
@@ -173,6 +174,7 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
     new GCNScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C));
   DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
   DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
+  DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
   return DAG;
 }
 
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 917d9cfa690..971208c5db8 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -47,6 +47,7 @@ add_llvm_target(AMDGPUCodeGen
   AMDGPUIntrinsicInfo.cpp
   AMDGPUISelDAGToDAG.cpp
   AMDGPULowerIntrinsics.cpp
+  AMDGPUMacroFusion.cpp
   AMDGPUMCInstLower.cpp
   AMDGPUMachineCFGStructurizer.cpp
   AMDGPUMachineFunction.cpp
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2017-07-06 20:57:05 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2017-07-06 20:57:05 +0000
commit	9aa45f047f303b6484afce6716472b3b1f510c7e (patch)
tree	d25023848a172579a8d26d9860316edaffcee5d0 /llvm/lib
parent	a81198d82de20e14efce44826e302785bfda093a (diff)
download	bcm5719-llvm-9aa45f047f303b6484afce6716472b3b1f510c7e.tar.gz bcm5719-llvm-9aa45f047f303b6484afce6716472b3b1f510c7e.zip