summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/MachineBasicBlock.cpp14
-rw-r--r--llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir70
-rw-r--r--llvm/test/CodeGen/AMDGPU/split-scalar-i64-add.ll4
3 files changed, 82 insertions, 6 deletions
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 38e8369dc73..62c360c5100 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -1439,6 +1439,20 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
}
}
+ // If we reached the end, it is safe to clobber Reg at the end of a block of
+ // no successor has it live in.
+ if (I == end()) {
+ for (MachineBasicBlock *S : successors()) {
+ for (MCSubRegIterator SubReg(Reg, TRI, /*IncludeSelf*/true);
+ SubReg.isValid(); ++SubReg) {
+ if (S->isLiveIn(*SubReg))
+ return LQR_Live;
+ }
+ }
+
+ return LQR_Dead;
+ }
+
// At this point we have no idea of the liveness of the register.
return LQR_Unknown;
}
diff --git a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir
index 204644227df..3b1ca762347 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir
@@ -318,14 +318,74 @@ body: |
---
+# We know this is OK because vcc isn't live out of the block.
+
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_no_liveout
+tracksRegLiveness: true
+
+body: |
+ ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_no_liveout
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x80000000)
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+ ; GCN: bb.1:
+ ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]]
+ bb.0:
+ successors: %bb.1
+
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ %0:sreg_32_xm0 = S_MOV_B32 12345
+ %1:vgpr_32 = IMPLICIT_DEF
+ %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+ S_NOP 0
+ S_NOP 0
+
+ bb.1:
+ S_ENDPGM implicit %2
+
+...
+
+---
+
# We know this is OK because vcc isn't live out of the block, even
-# though it had a defined value
+# though it had a defined but unused. value
-name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_dead_no_liveout
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_no_liveout_dead_vcc_def
tracksRegLiveness: true
body: |
- ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_dead_no_liveout
+ ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_no_liveout_dead_vcc_def
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000)
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
@@ -336,10 +396,12 @@ body: |
bb.0:
successors: %bb.1
- $vcc = S_MOV_B64 -1
+ S_NOP 0, implicit-def $vcc
%0:sreg_32_xm0 = S_MOV_B32 12345
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+ S_NOP 0
+ S_NOP 0
bb.1:
S_ENDPGM implicit %2
diff --git a/llvm/test/CodeGen/AMDGPU/split-scalar-i64-add.ll b/llvm/test/CodeGen/AMDGPU/split-scalar-i64-add.ll
index 59ddd6177d7..7645dad73e6 100644
--- a/llvm/test/CodeGen/AMDGPU/split-scalar-i64-add.ll
+++ b/llvm/test/CodeGen/AMDGPU/split-scalar-i64-add.ll
@@ -10,8 +10,8 @@ declare i32 @llvm.amdgcn.workitem.id.x() readnone
; FIXME: SIShrinkInstructions should force immediate fold.
; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_0:
-; SI: s_movk_i32 [[K:s[0-9]+]], 0x18f
-; SI: v_add_i32_e32 v{{[0-9]+}}, vcc, [[K]], v{{[0-9]+}}
+; SI: v_mov_b32_e32 [[V_VAL:v[0-9]+]], s
+; SI: v_add_i32_e32 v{{[0-9]+}}, vcc, 0x18f, [[V_VAL]]
; SI: v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
define amdgpu_kernel void @imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %s.val) {
%v.val = load volatile i32, i32 addrspace(1)* %in
OpenPOWER on IntegriCloud