diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-05-13 04:15:48 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-05-13 04:15:48 +0000 |
commit | 999f7dd84cc1409604719f093a16159fb68476e5 (patch) | |
tree | fbad46c055432a358c3e89b5790ae1b66cdfbe5d | |
parent | d1ad58b196064acf5afbdc4c8c58e2ad85c7997a (diff) | |
download | bcm5719-llvm-999f7dd84cc1409604719f093a16159fb68476e5.tar.gz bcm5719-llvm-999f7dd84cc1409604719f093a16159fb68476e5.zip |
AMDGPU: Remove verifier check for scc live ins
We only really need this to be true for SIFixSGPRCopies.
I'm not sure there's any way this could happen before that point.
Fixes a case where MachineCSE could introduce a cross block
scc use.
llvm-svn: 269391
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 10 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/uniform-cfg.ll | 50 |
2 files changed, 44 insertions, 16 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index f5cd065a593..826534b823f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1489,16 +1489,6 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); - // Make sure we don't have SCC live-ins to basic blocks. moveToVALU assumes - // all SCC users are in the same blocks as their defs. - const MachineBasicBlock *MBB = MI->getParent(); - if (MI == &MBB->front()) { - if (MBB->isLiveIn(AMDGPU::SCC)) { - ErrInfo = "scc register cannot be live across blocks."; - return false; - } - } - // Make sure the number of operands is correct. const MCInstrDesc &Desc = get(Opcode); if (!Desc.isVariadic() && diff --git a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll index 19df3d16d37..e83a7cb507a 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll @@ -226,7 +226,7 @@ ENDIF: ; preds = %IF, %main_body ; SI: s_endpgm define void @icmp_users_different_blocks(i32 %cond, i32 addrspace(1)* %out) { bb: - %tmp = tail call i32 @llvm.r600.read.tidig.x() #0 + %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %tmp1 = icmp sgt i32 %cond, 0 br i1 %tmp1, label %bb2, label %bb9 @@ -279,7 +279,7 @@ done: ; SI: buffer_store_dword [[ONE]] define void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) { entry: - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %d_cmp = icmp ult i32 %tid, 16 br i1 %d_cmp, label %if, label %endif @@ -313,7 +313,7 @@ entry: if: store i32 0, i32 addrspace(1)* %out - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %d_cmp = icmp ult i32 %tid, 16 br i1 %d_cmp, label %if_uniform, label %endif @@ -325,7 +325,7 @@ endif: ret void } -; SI: {{^}}divergent_if_uniform_if: +; SI-LABEL: {{^}}divergent_if_uniform_if: ; SI: v_cmp_eq_i32_e32 vcc, 0, v0 ; SI: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc ; SI: s_xor_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]] @@ -340,7 +340,7 @@ endif: ; SI: s_endpgm define void @divergent_if_uniform_if(i32 addrspace(1)* %out, i32 %cond) { entry: - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %d_cmp = icmp eq i32 %tid, 0 br i1 %d_cmp, label %if, label %endif @@ -360,6 +360,44 @@ exit: ret void } -declare i32 @llvm.r600.read.tidig.x() #0 +; The condition of the branches in the two blocks are +; uniform. MachineCSE replaces the 2nd condition with the inverse of +; the first, leaving an scc use in a different block than it was +; defed. + +; SI-LABEL: {{^}}cse_uniform_condition_different_blocks: +; SI: s_load_dword [[COND:s[0-9]+]] +; SI: s_cmp_lt_i32 [[COND]], 1 +; SI: s_cbranch_scc1 BB13_3 + +; SI: BB#1: +; SI-NOT: cmp +; SI: buffer_load_dword +; SI: buffer_store_dword +; SI: s_cbranch_scc1 BB13_3 + +; SI: BB13_3: +; SI: s_endpgm +define void @cse_uniform_condition_different_blocks(i32 %cond, i32 addrspace(1)* %out) { +bb: + %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 + %tmp1 = icmp sgt i32 %cond, 0 + br i1 %tmp1, label %bb2, label %bb9 + +bb2: ; preds = %bb + %tmp3 = load volatile i32, i32 addrspace(1)* undef + store volatile i32 0, i32 addrspace(1)* undef + %tmp9 = icmp sle i32 %cond, 0 + br i1 %tmp9, label %bb9, label %bb7 + +bb7: ; preds = %bb5 + store i32 %tmp3, i32 addrspace(1)* %out + br label %bb9 + +bb9: ; preds = %bb8, %bb4 + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #0 attributes #0 = { readnone } |