summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-05-13 04:15:48 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-05-13 04:15:48 +0000
commit999f7dd84cc1409604719f093a16159fb68476e5 (patch)
treefbad46c055432a358c3e89b5790ae1b66cdfbe5d
parentd1ad58b196064acf5afbdc4c8c58e2ad85c7997a (diff)
downloadbcm5719-llvm-999f7dd84cc1409604719f093a16159fb68476e5.tar.gz
bcm5719-llvm-999f7dd84cc1409604719f093a16159fb68476e5.zip
AMDGPU: Remove verifier check for scc live ins
We only really need this to be true for SIFixSGPRCopies. I'm not sure there's any way this could happen before that point. Fixes a case where MachineCSE could introduce a cross block scc use. llvm-svn: 269391
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp10
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-cfg.ll50
2 files changed, 44 insertions, 16 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index f5cd065a593..826534b823f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1489,16 +1489,6 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
- // Make sure we don't have SCC live-ins to basic blocks. moveToVALU assumes
- // all SCC users are in the same blocks as their defs.
- const MachineBasicBlock *MBB = MI->getParent();
- if (MI == &MBB->front()) {
- if (MBB->isLiveIn(AMDGPU::SCC)) {
- ErrInfo = "scc register cannot be live across blocks.";
- return false;
- }
- }
-
// Make sure the number of operands is correct.
const MCInstrDesc &Desc = get(Opcode);
if (!Desc.isVariadic() &&
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
index 19df3d16d37..e83a7cb507a 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
@@ -226,7 +226,7 @@ ENDIF: ; preds = %IF, %main_body
; SI: s_endpgm
define void @icmp_users_different_blocks(i32 %cond, i32 addrspace(1)* %out) {
bb:
- %tmp = tail call i32 @llvm.r600.read.tidig.x() #0
+ %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%tmp1 = icmp sgt i32 %cond, 0
br i1 %tmp1, label %bb2, label %bb9
@@ -279,7 +279,7 @@ done:
; SI: buffer_store_dword [[ONE]]
define void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) {
entry:
- %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%d_cmp = icmp ult i32 %tid, 16
br i1 %d_cmp, label %if, label %endif
@@ -313,7 +313,7 @@ entry:
if:
store i32 0, i32 addrspace(1)* %out
- %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%d_cmp = icmp ult i32 %tid, 16
br i1 %d_cmp, label %if_uniform, label %endif
@@ -325,7 +325,7 @@ endif:
ret void
}
-; SI: {{^}}divergent_if_uniform_if:
+; SI-LABEL: {{^}}divergent_if_uniform_if:
; SI: v_cmp_eq_i32_e32 vcc, 0, v0
; SI: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
; SI: s_xor_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]]
@@ -340,7 +340,7 @@ endif:
; SI: s_endpgm
define void @divergent_if_uniform_if(i32 addrspace(1)* %out, i32 %cond) {
entry:
- %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%d_cmp = icmp eq i32 %tid, 0
br i1 %d_cmp, label %if, label %endif
@@ -360,6 +360,44 @@ exit:
ret void
}
-declare i32 @llvm.r600.read.tidig.x() #0
+; The condition of the branches in the two blocks are
+; uniform. MachineCSE replaces the 2nd condition with the inverse of
+; the first, leaving an scc use in a different block than it was
+; defed.
+
+; SI-LABEL: {{^}}cse_uniform_condition_different_blocks:
+; SI: s_load_dword [[COND:s[0-9]+]]
+; SI: s_cmp_lt_i32 [[COND]], 1
+; SI: s_cbranch_scc1 BB13_3
+
+; SI: BB#1:
+; SI-NOT: cmp
+; SI: buffer_load_dword
+; SI: buffer_store_dword
+; SI: s_cbranch_scc1 BB13_3
+
+; SI: BB13_3:
+; SI: s_endpgm
+define void @cse_uniform_condition_different_blocks(i32 %cond, i32 addrspace(1)* %out) {
+bb:
+ %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+ %tmp1 = icmp sgt i32 %cond, 0
+ br i1 %tmp1, label %bb2, label %bb9
+
+bb2: ; preds = %bb
+ %tmp3 = load volatile i32, i32 addrspace(1)* undef
+ store volatile i32 0, i32 addrspace(1)* undef
+ %tmp9 = icmp sle i32 %cond, 0
+ br i1 %tmp9, label %bb9, label %bb7
+
+bb7: ; preds = %bb5
+ store i32 %tmp3, i32 addrspace(1)* %out
+ br label %bb9
+
+bb9: ; preds = %bb8, %bb4
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
attributes #0 = { readnone }
OpenPOWER on IntegriCloud