diff options
-rw-r--r-- | llvm/lib/CodeGen/MachineBasicBlock.cpp | 14 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir | 70 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/split-scalar-i64-add.ll | 4 |
3 files changed, 82 insertions, 6 deletions
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index 38e8369dc73..62c360c5100 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -1439,6 +1439,20 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, } } + // If we reached the end, it is safe to clobber Reg at the end of a block of + // no successor has it live in. + if (I == end()) { + for (MachineBasicBlock *S : successors()) { + for (MCSubRegIterator SubReg(Reg, TRI, /*IncludeSelf*/true); + SubReg.isValid(); ++SubReg) { + if (S->isLiveIn(*SubReg)) + return LQR_Live; + } + } + + return LQR_Dead; + } + // At this point we have no idea of the liveness of the register. return LQR_Unknown; } diff --git a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir index 204644227df..3b1ca762347 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir @@ -318,14 +318,74 @@ body: | --- +# We know this is OK because vcc isn't live out of the block. + +name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_no_liveout +tracksRegLiveness: true + +body: | + ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_no_liveout + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: bb.1: + ; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]] + bb.0: + successors: %bb.1 + + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + %0:sreg_32_xm0 = S_MOV_B32 12345 + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_NOP 0 + S_NOP 0 + + bb.1: + S_ENDPGM implicit %2 + +... + +--- + # We know this is OK because vcc isn't live out of the block, even -# though it had a defined value +# though it had a defined but unused. value -name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_dead_no_liveout +name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_no_liveout_dead_vcc_def tracksRegLiveness: true body: | - ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_dead_no_liveout + ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_no_liveout_dead_vcc_def ; GCN: bb.0: ; GCN: successors: %bb.1(0x80000000) ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 @@ -336,10 +396,12 @@ body: | bb.0: successors: %bb.1 - $vcc = S_MOV_B64 -1 + S_NOP 0, implicit-def $vcc %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec + S_NOP 0 + S_NOP 0 bb.1: S_ENDPGM implicit %2 diff --git a/llvm/test/CodeGen/AMDGPU/split-scalar-i64-add.ll b/llvm/test/CodeGen/AMDGPU/split-scalar-i64-add.ll index 59ddd6177d7..7645dad73e6 100644 --- a/llvm/test/CodeGen/AMDGPU/split-scalar-i64-add.ll +++ b/llvm/test/CodeGen/AMDGPU/split-scalar-i64-add.ll @@ -10,8 +10,8 @@ declare i32 @llvm.amdgcn.workitem.id.x() readnone ; FIXME: SIShrinkInstructions should force immediate fold. ; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_0: -; SI: s_movk_i32 [[K:s[0-9]+]], 0x18f -; SI: v_add_i32_e32 v{{[0-9]+}}, vcc, [[K]], v{{[0-9]+}} +; SI: v_mov_b32_e32 [[V_VAL:v[0-9]+]], s +; SI: v_add_i32_e32 v{{[0-9]+}}, vcc, 0x18f, [[V_VAL]] ; SI: v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc define amdgpu_kernel void @imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %s.val) { %v.val = load volatile i32, i32 addrspace(1)* %in |