summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp18
1 files changed, 15 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index bbbcf833964..eec9f98d12c 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -226,6 +226,15 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
Opc = (EltSize == 4) ? AMDGPU::DS_READ2ST64_B32 : AMDGPU::DS_READ2ST64_B64;
}
+ unsigned SubRegIdx0 = (EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
+ unsigned SubRegIdx1 = (EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
+
+ if (NewOffset0 > NewOffset1) {
+ // Canonicalize the merged instruction so the smaller offset comes first.
+ std::swap(NewOffset0, NewOffset1);
+ std::swap(SubRegIdx0, SubRegIdx1);
+ }
+
assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) &&
(NewOffset0 != NewOffset1) &&
"Computed offset doesn't fit");
@@ -246,9 +255,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
.addMemOperand(*I->memoperands_begin())
.addMemOperand(*Paired->memoperands_begin());
- unsigned SubRegIdx0 = (EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
- unsigned SubRegIdx1 = (EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
-
const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
// Copy to the old destination registers.
@@ -322,6 +328,12 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
Opc = (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32 : AMDGPU::DS_WRITE2ST64_B64;
}
+ if (NewOffset0 > NewOffset1) {
+ // Canonicalize the merged instruction so the smaller offset comes first.
+ std::swap(NewOffset0, NewOffset1);
+ std::swap(Data0, Data1);
+ }
+
assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) &&
(NewOffset0 != NewOffset1) &&
"Computed offset doesn't fit");
OpenPOWER on IntegriCloud