diff options
-rw-r--r-- | llvm/lib/CodeGen/TwoAddressInstructionPass.cpp | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll | 19 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll | 25 |
3 files changed, 36 insertions, 12 deletions
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 8feb18b4d03..16ae90de66e 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1567,14 +1567,14 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, if (!IsEarlyClobber) { // Replace other (un-tied) uses of regB with LastCopiedReg. for (MachineOperand &MO : MI->operands()) { - if (MO.isReg() && MO.getReg() == RegB && MO.getSubReg() == SubRegB && + if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { if (MO.isKill()) { MO.setIsKill(false); RemovedKillFlag = true; } MO.setReg(LastCopiedReg); - MO.setSubReg(0); + MO.setSubReg(MO.getSubReg()); } } } diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll new file mode 100644 index 00000000000..877956be308 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll @@ -0,0 +1,19 @@ +; RUN: llc -O0 -march=amdgcn -verify-machineinstrs < %s | FileCheck %s + +; FIXME: Merge into indirect-addressing-si.ll + +; Make sure that TwoAddressInstructions keeps src0 as subregister sub0 +; of the tied implicit use and def of the super register. + +; CHECK-LABEL: {{^}}insert_wo_offset: +; CHECK: s_load_dword [[IN:s[0-9]+]] +; CHECK: s_mov_b32 m0, [[IN]] +; CHECK: v_movreld_b32_e32 v[[ELT0:[0-9]+]] +; CHECK-NEXT: buffer_store_dwordx4 v{{\[}}[[ELT0]]: +define void @insert_wo_offset(<4 x float> addrspace(1)* %out, i32 %in) { +entry: + %ins = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %in + store <4 x float> %ins, <4 x float> addrspace(1)* %out + ret void +} + diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll index 5f5978c87b2..bf230b2db04 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -125,27 +125,32 @@ entry: } ; CHECK-LABEL: {{^}}insert_w_offset: -; CHECK: s_load_dword [[IN:s[0-9]+]] -; CHECK: s_mov_b32 m0, [[IN]] -; CHECK: v_movreld_b32_e32 -define void @insert_w_offset(float addrspace(1)* %out, i32 %in) { +; CHECK-DAG: s_load_dword [[IN:s[0-9]+]] +; CHECK-DAG: s_mov_b32 m0, [[IN]] +; CHECK-DAG: v_mov_b32_e32 v[[ELT0:[0-9]+]], 1.0 +; CHECK-DAG: v_mov_b32_e32 v[[ELT1:[0-9]+]], 2.0 +; CHECK-DAG: v_mov_b32_e32 v[[ELT2:[0-9]+]], 0x40400000 +; CHECK-DAG: v_mov_b32_e32 v[[ELT3:[0-9]+]], 4.0 +; CHECK-DAG: v_mov_b32_e32 v[[INS:[0-9]+]], 0x40a00000 +; CHECK: v_movreld_b32_e32 v[[ELT1]], v[[INS]] +; CHECK: buffer_store_dwordx4 v{{\[}}[[ELT0]]:[[ELT3]]{{\]}} +define void @insert_w_offset(<4 x float> addrspace(1)* %out, i32 %in) { entry: %0 = add i32 %in, 1 %1 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %0 - %2 = extractelement <4 x float> %1, i32 2 - store float %2, float addrspace(1)* %out + store <4 x float> %1, <4 x float> addrspace(1)* %out ret void } ; CHECK-LABEL: {{^}}insert_wo_offset: ; CHECK: s_load_dword [[IN:s[0-9]+]] ; CHECK: s_mov_b32 m0, [[IN]] -; CHECK: v_movreld_b32_e32 -define void @insert_wo_offset(float addrspace(1)* %out, i32 %in) { +; CHECK: v_movreld_b32_e32 v[[ELT0:[0-9]+]] +; CHECK: buffer_store_dwordx4 v{{\[}}[[ELT0]]: +define void @insert_wo_offset(<4 x float> addrspace(1)* %out, i32 %in) { entry: %0 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %in - %1 = extractelement <4 x float> %0, i32 2 - store float %1, float addrspace(1)* %out + store <4 x float> %0, <4 x float> addrspace(1)* %out ret void } |