AMDGPU: Enable store clustering

Also respect the TII hook for these like the generic code does in case we want a flag later to disable this. llvm-svn: 287021
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2016-11-15 20:22:55 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2016-11-15 20:22:55 +0000
commit: d4bb5e483118cfa2634a21689afa217134d98eab (patch)
tree: bda02df55583ab0a267a8cdc06d2cb28e5928c3f /llvm/lib/Target/AMDGPU
parent: 7004d6664efde9d1148ed677649593f989cc6056 (diff)
download: bcm5719-llvm-d4bb5e483118cfa2634a21689afa217134d98eab.tar.gz
bcm5719-llvm-d4bb5e483118cfa2634a21689afa217134d98eab.zip
3 files changed, 13 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index 42c7b967f3e..f88bb69c3a7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -36,6 +36,10 @@ bool AMDGPUInstrInfo::enableClusterLoads() const {
   return true;
 }
 
+bool AMDGPUInstrInfo::enableClusterStores() const {
+  return true;
+}
+
 // FIXME: This behaves strangely. If, for example, you have 32 load + stores,
 // the first 16 loads will be interleaved with the stores, and the next 16 will
 // be clustered as expected. It should really split into 2 16 store batches.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index de834f453a6..46e985dc8fd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -40,6 +40,7 @@ public:
   explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st);
 
   bool enableClusterLoads() const override;
+  bool enableClusterStores() const override;
 
   bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
                                int64_t Offset1, int64_t Offset2,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 877dacd06f7..baf4d192c57 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -102,7 +102,14 @@ static ScheduleDAGInstrs *
 createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
   ScheduleDAGMILive *DAG =
       new ScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C));
-  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+
+  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII);
+  if (TII->enableClusterLoads())
+    DAG->addMutation(createLoadClusterDAGMutation(TII, DAG->TRI));
+
+  if (TII->enableClusterStores())
+    DAG->addMutation(createStoreClusterDAGMutation(TII, DAG->TRI));
+
   return DAG;
 }
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2016-11-15 20:22:55 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2016-11-15 20:22:55 +0000
commit	d4bb5e483118cfa2634a21689afa217134d98eab (patch)
tree	bda02df55583ab0a267a8cdc06d2cb28e5928c3f /llvm/lib/Target/AMDGPU
parent	7004d6664efde9d1148ed677649593f989cc6056 (diff)
download	bcm5719-llvm-d4bb5e483118cfa2634a21689afa217134d98eab.tar.gz bcm5719-llvm-d4bb5e483118cfa2634a21689afa217134d98eab.zip