summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-05-03 14:40:10 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-05-03 14:40:10 +0000
commit0446fbe45ecf417cc05cddc198a3503ea8d2bc81 (patch)
tree67d58564e3c6f02039d30c6fe82a776d90ffb0a3
parentbc876df3a5f0f25b7c43a90e4c35f553ca5362df (diff)
downloadbcm5719-llvm-0446fbe45ecf417cc05cddc198a3503ea8d2bc81.tar.gz
bcm5719-llvm-0446fbe45ecf417cc05cddc198a3503ea8d2bc81.zip
AMDGPU: Replace shrunk instruction with dummy implicit_def
This was broken if the original operand was killed. The kill flag would appear on both instructions, and fail the verifier. Keep the kill flag, but remove the operands from the old instruction. This has an added benefit of really reducing the use count for future folds. Ideally the pass would be structured more like what PeepholeOptimizer does to avoid this hack to avoid breaking instruction iterators. llvm-svn: 359891
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp12
-rw-r--r--llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir56
2 files changed, 64 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 8db2ee21603..dcfc39e6755 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -233,8 +233,6 @@ static bool updateOperand(FoldCandidate &Fold,
const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
- const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg());
- unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC);
MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
@@ -244,9 +242,15 @@ static bool updateOperand(FoldCandidate &Fold,
}
// Keep the old instruction around to avoid breaking iterators, but
- // replace the outputs with dummy registers.
+ // replace it with a dummy instruction to remove uses.
+ //
+ // FIXME: We should not invert how this pass looks at operands to avoid
+ // this. Should track set of foldable movs instead of looking for uses
+ // when looking at a use.
Dst0.setReg(NewReg0);
- Dst1.setReg(NewReg1);
+ for (unsigned I = MI->getNumOperands() - 1; I > 0; --I)
+ MI->RemoveOperand(I);
+ MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF));
if (Fold.isCommuted())
TII.commuteInstruction(*Inst32, false);
diff --git a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir
index 5e58210571d..865c84ad8fc 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir
@@ -590,3 +590,59 @@ body: |
S_ENDPGM 0, implicit %2
...
+
+---
+name: shrink_add_kill_flags_src0
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; GCN-LABEL: name: shrink_add_kill_flags_src0
+ ; GCN: liveins: $vgpr0
+ ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
+ ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 killed [[V_MOV_B32_e32_]], [[COPY]], implicit-def $vcc, implicit $exec
+ ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
+ %2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 killed %1, %0, 0, implicit $exec
+ S_ENDPGM 0, implicit %2
+...
+
+---
+name: shrink_add_kill_flags_src1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; GCN-LABEL: name: shrink_add_kill_flags_src1
+ ; GCN: liveins: $vgpr0
+ ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
+ ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_MOV_B32_e32_]], killed [[COPY]], implicit-def $vcc, implicit $exec
+ ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
+ %2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 %1, killed %0, 0, implicit $exec
+ S_ENDPGM 0, implicit %2
+...
+
+---
+name: shrink_addc_kill_flags_src2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vcc
+ ; GCN-LABEL: name: shrink_addc_kill_flags_src2
+ ; GCN: liveins: $vgpr0, $vcc
+ ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
+ ; GCN: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $vcc
+ ; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[V_MOV_B32_e32_]], [[COPY]], [[COPY1]], 0, implicit $exec
+ ; GCN: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]]
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
+ %2:sreg_64_xexec = COPY $vcc
+ %3:vgpr_32, %4:sreg_64_xexec = V_ADDC_U32_e64 %1, %0, %2, 0, implicit $exec
+ S_ENDPGM 0, implicit %3
+...
OpenPOWER on IntegriCloud