3 files changed, 82 insertions, 6 deletions
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 38e8369dc73..62c360c5100 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -1439,6 +1439,20 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
     }
   }
 
+  // If we reached the end, it is safe to clobber Reg at the end of a block of
+  // no successor has it live in.
+  if (I == end()) {
+    for (MachineBasicBlock *S : successors()) {
+      for (MCSubRegIterator SubReg(Reg, TRI, /*IncludeSelf*/true);
+           SubReg.isValid(); ++SubReg) {
+        if (S->isLiveIn(*SubReg))
+          return LQR_Live;
+      }
+    }
+
+    return LQR_Dead;
+  }
+
   // At this point we have no idea of the liveness of the register.
   return LQR_Unknown;
 }
diff --git a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir
index 204644227df..3b1ca762347 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir
@@ -318,14 +318,74 @@ body:             |
 
 ---
 
+# We know this is OK because vcc isn't live out of the block.
+
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_no_liveout
+tracksRegLiveness: true
+
+body:             |
+  ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_no_liveout
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x80000000)
+  ; GCN:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
+  ; GCN:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GCN:   [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+  ; GCN: bb.1:
+  ; GCN:   S_ENDPGM implicit [[V_ADD_I32_e32_]]
+  bb.0:
+    successors: %bb.1
+
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    %0:sreg_32_xm0 = S_MOV_B32 12345
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+    S_NOP 0
+    S_NOP 0
+
+  bb.1:
+    S_ENDPGM implicit %2
+
+...
+
+---
+
 # We know this is OK because vcc isn't live out of the block, even
-# though it had a defined value
+# though it had a defined but unused. value
 
-name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_dead_no_liveout
+name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_no_liveout_dead_vcc_def
 tracksRegLiveness: true
 
 body:             |
-  ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_dead_no_liveout
+  ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_no_liveout_dead_vcc_def
   ; GCN: bb.0:
   ; GCN:   successors: %bb.1(0x80000000)
   ; GCN:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
@@ -336,10 +396,12 @@ body:             |
   bb.0:
     successors: %bb.1
 
-    $vcc = S_MOV_B64 -1
+    S_NOP 0, implicit-def $vcc
     %0:sreg_32_xm0 = S_MOV_B32 12345
     %1:vgpr_32 = IMPLICIT_DEF
     %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
+    S_NOP 0
+    S_NOP 0
 
   bb.1:
     S_ENDPGM implicit %2
diff --git a/llvm/test/CodeGen/AMDGPU/split-scalar-i64-add.ll b/llvm/test/CodeGen/AMDGPU/split-scalar-i64-add.ll
index 59ddd6177d7..7645dad73e6 100644
--- a/llvm/test/CodeGen/AMDGPU/split-scalar-i64-add.ll
+++ b/llvm/test/CodeGen/AMDGPU/split-scalar-i64-add.ll
@@ -10,8 +10,8 @@ declare i32 @llvm.amdgcn.workitem.id.x() readnone
 ; FIXME: SIShrinkInstructions should force immediate fold.
 
 ; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_0:
-; SI: s_movk_i32 [[K:s[0-9]+]], 0x18f
-; SI: v_add_i32_e32 v{{[0-9]+}}, vcc, [[K]], v{{[0-9]+}}
+; SI: v_mov_b32_e32 [[V_VAL:v[0-9]+]], s
+; SI: v_add_i32_e32 v{{[0-9]+}}, vcc, 0x18f, [[V_VAL]]
 ; SI: v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
 define amdgpu_kernel void @imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %s.val) {
   %v.val = load volatile i32, i32 addrspace(1)* %in