diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 18 |
1 files changed, 15 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index bbbcf833964..eec9f98d12c 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -226,6 +226,15 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair( Opc = (EltSize == 4) ? AMDGPU::DS_READ2ST64_B32 : AMDGPU::DS_READ2ST64_B64; } + unsigned SubRegIdx0 = (EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1; + unsigned SubRegIdx1 = (EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3; + + if (NewOffset0 > NewOffset1) { + // Canonicalize the merged instruction so the smaller offset comes first. + std::swap(NewOffset0, NewOffset1); + std::swap(SubRegIdx0, SubRegIdx1); + } + assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) && (NewOffset0 != NewOffset1) && "Computed offset doesn't fit"); @@ -246,9 +255,6 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair( .addMemOperand(*I->memoperands_begin()) .addMemOperand(*Paired->memoperands_begin()); - unsigned SubRegIdx0 = (EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1; - unsigned SubRegIdx1 = (EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3; - const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); // Copy to the old destination registers. @@ -322,6 +328,12 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair( Opc = (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32 : AMDGPU::DS_WRITE2ST64_B64; } + if (NewOffset0 > NewOffset1) { + // Canonicalize the merged instruction so the smaller offset comes first. + std::swap(NewOffset0, NewOffset1); + std::swap(Data0, Data1); + } + assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) && (NewOffset0 != NewOffset1) && "Computed offset doesn't fit"); |