diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-11-15 20:22:55 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-11-15 20:22:55 +0000 |
commit | d4bb5e483118cfa2634a21689afa217134d98eab (patch) | |
tree | bda02df55583ab0a267a8cdc06d2cb28e5928c3f /llvm/lib/Target/AMDGPU | |
parent | 7004d6664efde9d1148ed677649593f989cc6056 (diff) | |
download | bcm5719-llvm-d4bb5e483118cfa2634a21689afa217134d98eab.tar.gz bcm5719-llvm-d4bb5e483118cfa2634a21689afa217134d98eab.zip |
AMDGPU: Enable store clustering
Also respect the TII hook for these like the generic code does
in case we want a flag later to disable this.
llvm-svn: 287021
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 9 |
3 files changed, 13 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp index 42c7b967f3e..f88bb69c3a7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -36,6 +36,10 @@ bool AMDGPUInstrInfo::enableClusterLoads() const { return true; } +bool AMDGPUInstrInfo::enableClusterStores() const { + return true; +} + // FIXME: This behaves strangely. If, for example, you have 32 load + stores, // the first 16 loads will be interleaved with the stores, and the next 16 will // be clustered as expected. It should really split into 2 16 store batches. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h index de834f453a6..46e985dc8fd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -40,6 +40,7 @@ public: explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st); bool enableClusterLoads() const override; + bool enableClusterStores() const override; bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 877dacd06f7..baf4d192c57 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -102,7 +102,14 @@ static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) { ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C)); - DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); + + const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII); + if (TII->enableClusterLoads()) + DAG->addMutation(createLoadClusterDAGMutation(TII, DAG->TRI)); + + if (TII->enableClusterStores()) + DAG->addMutation(createStoreClusterDAGMutation(TII, DAG->TRI)); + return DAG; } |