diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-12-15 21:57:11 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-12-15 21:57:11 +0000 |
| commit | 327188aa1531224245573638298fd0c4e5897122 (patch) | |
| tree | cc8a38e74a10f0ea5bda5642a050b6dd88df9521 | |
| parent | 68e58b4b60eb41f91c1ae0dd92647f91c130436d (diff) | |
| download | bcm5719-llvm-327188aa1531224245573638298fd0c4e5897122.tar.gz bcm5719-llvm-327188aa1531224245573638298fd0c4e5897122.zip | |
AMDGPU: Select branch on undef to uniform scc branch
llvm-svn: 289877
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 9 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll | 13 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/ret_jump.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/skip-if-dead.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/unhandled-loop-condition-assertion.ll | 6 |
9 files changed, 35 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index cb98ffe0113..ef3b44f7c21 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1458,6 +1458,12 @@ bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { SDValue Cond = N->getOperand(1); + if (Cond.isUndef()) { + CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other, + N->getOperand(2), N->getOperand(0)); + return; + } + if (isCBranchSCC(N)) { // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it. SelectCode(N); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 4d4f4c4407b..bfc67c9542b 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1762,6 +1762,15 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( MI.eraseFromParent(); return BB; } + case AMDGPU::SI_BR_UNDEF: { + const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); + const DebugLoc &DL = MI.getDebugLoc(); + MachineInstr *Br = BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_CBRANCH_SCC1)) + .addOperand(MI.getOperand(0)); + Br->getOperand(1).setIsUndef(true); // read undef SCC + MI.eraseFromParent(); + return BB; + } default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); } diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index f107f545dc6..54865956ae2 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -246,6 +246,12 @@ def SI_KILL_TERMINATOR : SPseudoInstSI < } // End Uses = [EXEC], Defs = [EXEC,VCC] +// Branch on undef scc. Used to avoid intermediate copy from +// IMPLICIT_DEF to SCC. +def SI_BR_UNDEF : SPseudoInstSI <(outs), (ins sopp_brtarget:$simm16)> { + let isTerminator = 1; + let usesCustomInserter = 1; +} def SI_PS_LIVE : PseudoInstSI < (outs SReg_64:$dst), (ins), diff --git a/llvm/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll b/llvm/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll index 868916ae993..7aa6a53e902 100644 --- a/llvm/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll @@ -27,7 +27,7 @@ ; GCN-LABEL: {{^}}sink_ubfe_i32: ; GCN-NOT: lshr -; GCN: s_cbranch_vccnz +; GCN: s_cbranch_scc1 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008 ; GCN: BB0_2: @@ -121,7 +121,7 @@ ret: ; GCN-LABEL: {{^}}sink_ubfe_i16: ; GCN-NOT: lshr ; VI: s_bfe_u32 s0, s0, 0xc0004 -; GCN: s_cbranch_vccnz +; GCN: s_cbranch_scc1 ; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004 ; VI: s_and_b32 s0, s0, 0xff @@ -175,12 +175,13 @@ ret: ; OPT: ret ; GCN-LABEL: {{^}}sink_ubfe_i64_span_midpoint: -; GCN: s_lshr_b64 s{{\[}}[[LO:[0-9]+]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, 30 -; GCN: s_cbranch_vccnz BB3_2 +; GCN: s_cbranch_scc1 BB3_2 +; GCN: s_lshr_b64 s{{\[}}[[LO:[0-9]+]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, 30 ; GCN: s_and_b32 s{{[0-9]+}}, s[[LO]], 0xff ; GCN: BB3_2: +; GCN: s_lshr_b64 s{{\[}}[[LO:[0-9]+]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, 30 ; GCN: s_and_b32 s{{[0-9]+}}, s[[LO]], 0x7f ; GCN: BB3_3: @@ -225,7 +226,7 @@ ret: ; GCN-LABEL: {{^}}sink_ubfe_i64_low32: -; GCN: s_cbranch_vccnz BB4_2 +; GCN: s_cbranch_scc1 BB4_2 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000f @@ -273,7 +274,7 @@ ret: ; OPT: ret ; GCN-LABEL: {{^}}sink_ubfe_i64_high32: -; GCN: s_cbranch_vccnz BB5_2 +; GCN: s_cbranch_scc1 BB5_2 ; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80003 ; GCN: BB5_2: diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll index 5a2130006ab..e85db65e742 100644 --- a/llvm/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll +++ b/llvm/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll @@ -4,7 +4,7 @@ ; SILowerI1Copies was not handling IMPLICIT_DEF ; SI-LABEL: {{^}}br_implicit_def: ; SI: BB#0: -; SI-NEXT: s_cbranch_vccnz +; SI-NEXT: s_cbranch_scc1 define void @br_implicit_def(i32 addrspace(1)* %out, i32 %arg) #0 { bb: br i1 undef, label %bb1, label %bb2 diff --git a/llvm/test/CodeGen/AMDGPU/ret_jump.ll b/llvm/test/CodeGen/AMDGPU/ret_jump.ll index f7380cd9692..51ca6049241 100644 --- a/llvm/test/CodeGen/AMDGPU/ret_jump.ll +++ b/llvm/test/CodeGen/AMDGPU/ret_jump.ll @@ -5,7 +5,7 @@ ; Mask should be in original state after executed unreachable block ; GCN-LABEL: {{^}}main: -; GCN: s_cbranch_vccnz [[RET_BB:BB[0-9]+_[0-9]+]] +; GCN: s_cbranch_scc1 [[RET_BB:BB[0-9]+_[0-9]+]] ; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc ; GCN-NEXT: s_xor_b64 [[XOR_EXEC:s\[[0-9]+:[0-9]+\]]], exec, [[SAVE_EXEC]] diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll index c61f2b0744d..ef616eb6380 100644 --- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll +++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll @@ -37,7 +37,7 @@ bb5: ; preds = %bb3, %bb1 ; OPT-NOT: call i1 @llvm.amdgcn.loop ; GCN-LABEL: {{^}}annotate_ret_noloop: -; GCN: s_cbranch_vccnz +; GCN: s_cbranch_scc1 ; GCN: s_endpgm ; GCN: .Lfunc_end1 define void @annotate_ret_noloop(<4 x float> addrspace(1)* noalias nocapture readonly %arg) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll index 33f5e98fcc7..60cee7a3499 100644 --- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll +++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll @@ -263,7 +263,7 @@ exit: ; CHECK-NEXT: s_endpgm ; CHECK: [[KILLBB:BB[0-9]+_[0-9]+]]: -; CHECK-NEXT: s_cbranch_vccz [[PHIBB:BB[0-9]+_[0-9]+]] +; CHECK-NEXT: s_cbranch_scc0 [[PHIBB:BB[0-9]+_[0-9]+]] ; CHECK: [[PHIBB]]: ; CHECK: v_cmp_eq_f32_e32 vcc, 0, [[PHIREG]] diff --git a/llvm/test/CodeGen/AMDGPU/unhandled-loop-condition-assertion.ll b/llvm/test/CodeGen/AMDGPU/unhandled-loop-condition-assertion.ll index f09f73c38b4..d96ee6d21ce 100644 --- a/llvm/test/CodeGen/AMDGPU/unhandled-loop-condition-assertion.ll +++ b/llvm/test/CodeGen/AMDGPU/unhandled-loop-condition-assertion.ll @@ -40,7 +40,7 @@ for.end: ; preds = %for.body, %entry ; COMMON-LABEL: {{^}}branch_false: ; SI: s_cbranch_vccnz -; SI: s_cbranch_vccnz +; SI: s_cbranch_scc1 ; SI: s_endpgm define void @branch_false(i8 addrspace(1)* nocapture %main, i32 %main_stride) #0 { entry: @@ -76,8 +76,8 @@ for.end: ; preds = %for.body, %entry } ; COMMON-LABEL: {{^}}branch_undef: -; SI: s_cbranch_vccnz -; SI: s_cbranch_vccnz +; SI: s_cbranch_scc1 +; SI: s_cbranch_scc1 ; SI: s_endpgm define void @branch_undef(i8 addrspace(1)* nocapture %main, i32 %main_stride) #0 { entry: |

