summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h5
-rw-r--r--llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp9
2 files changed, 10 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 51be81a7a81..43c83efe47b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -822,6 +822,11 @@ public:
return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
}
+ // \returns true if the subtarget supports DWORDX3 load/store instructions.
+ bool hasDwordx3LoadStores() const {
+ return CIInsts;
+ }
+
bool hasSMovFedHazard() const {
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 52bbe5c0345..be291b12730 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -160,7 +160,7 @@ private:
bool OptimizeAgain;
static bool offsetsCanBeCombined(CombineInfo &CI);
- static bool widthsFit(const CombineInfo &CI);
+ static bool widthsFit(const GCNSubtarget &STM, const CombineInfo &CI);
static unsigned getNewOpcode(const CombineInfo &CI);
static std::pair<unsigned, unsigned> getSubRegIdxs(const CombineInfo &CI);
const TargetRegisterClass *getTargetRegisterClass(const CombineInfo &CI);
@@ -367,11 +367,12 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) {
return false;
}
-bool SILoadStoreOptimizer::widthsFit(const CombineInfo &CI) {
+bool SILoadStoreOptimizer::widthsFit(const GCNSubtarget &STM,
+ const CombineInfo &CI) {
const unsigned Width = (CI.Width0 + CI.Width1);
switch (CI.InstClass) {
default:
- return Width <= 4;
+ return (Width <= 4) && (STM.hasDwordx3LoadStores() || (Width != 3));
case S_BUFFER_LOAD_IMM:
switch (Width) {
default:
@@ -645,7 +646,7 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
// We also need to go through the list of instructions that we plan to
// move and make sure they are all safe to move down past the merged
// instruction.
- if (widthsFit(CI) && offsetsCanBeCombined(CI))
+ if (widthsFit(*STM, CI) && offsetsCanBeCombined(CI))
if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))
return true;
}
OpenPOWER on IntegriCloud