diff options
author | Michael Liao <michael.hliao@gmail.com> | 2019-05-27 18:26:29 +0000 |
---|---|---|
committer | Michael Liao <michael.hliao@gmail.com> | 2019-05-27 18:26:29 +0000 |
commit | 9c70c574b4fec75f4c8a530891e6e412e7ad77be (patch) | |
tree | 061032339bf69b8552f61964d75564ca7ee46f46 | |
parent | f4040a0dd81b1bb4d1a4704492d1642c09190f56 (diff) | |
download | bcm5719-llvm-9c70c574b4fec75f4c8a530891e6e412e7ad77be.tar.gz bcm5719-llvm-9c70c574b4fec75f4c8a530891e6e412e7ad77be.zip |
[SelectionDAG] Enhance the simplification of `copyto` from `implicit-def`.
Summary:
- The current implementation simplifies the case where the source of
`copyto` is `implicit-def`ed. However, it only works when that
`implicit-def` is single-used since it detects that from
`implicit-def` and cannot determine which destination vreg should be
used if there are multiple uses.
- This patch changes that detection when `copyto` is being emitted. If
that `copyto`'s source is defined from `implicit-def`, it simplifies
it. Hence, it works even that `implicit-def` is multi-used.
- Except it simplifies the internal IR, it won't improve the quality of
code generation. However, it helps to detect 'implicit-def` in a
straight-forward manner in some passes, such as `si-i1-copies`. A test
case is added.
Reviewers: sunfish, nhaehnle
Subscribers: jvesely, hiraditya, asbirlea, llvm-commits, yaxunl
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62342
llvm-svn: 361777
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 40 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h | 5 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll | 19 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/implicit-def-muse.ll | 22 |
4 files changed, 55 insertions, 31 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 4b78d1bb6b1..8533a94c48a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -186,24 +186,6 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, assert(isNew && "Node emitted out of order - early"); } -/// getDstOfCopyToRegUse - If the only use of the specified result number of -/// node is a CopyToReg, return its destination register. Return 0 otherwise. -unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node, - unsigned ResNo) const { - if (!Node->hasOneUse()) - return 0; - - SDNode *User = *Node->use_begin(); - if (User->getOpcode() == ISD::CopyToReg && - User->getOperand(2).getNode() == Node && - User->getOperand(2).getResNo() == ResNo) { - unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) - return Reg; - } - return 0; -} - void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstrBuilder &MIB, const MCInstrDesc &II, @@ -286,14 +268,11 @@ unsigned InstrEmitter::getVR(SDValue Op, if (Op.isMachineOpcode() && Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { // Add an IMPLICIT_DEF instruction before every use. - unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo()); // IMPLICIT_DEF can produce any type of result so its MCInstrDesc // does not include operand register class info. - if (!VReg) { - const TargetRegisterClass *RC = TLI->getRegClassFor( - Op.getSimpleValueType(), Op.getNode()->isDivergent()); - VReg = MRI->createVirtualRegister(RC); - } + const TargetRegisterClass *RC = TLI->getRegClassFor( + Op.getSimpleValueType(), Op.getNode()->isDivergent()); + unsigned VReg = MRI->createVirtualRegister(RC); BuildMI(*MBB, InsertPos, Op.getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), VReg); return VReg; @@ -1011,14 +990,23 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, case ISD::TokenFactor: // fall thru break; case ISD::CopyToReg: { - unsigned SrcReg; + unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); SDValue SrcVal = Node->getOperand(2); + if (TargetRegisterInfo::isVirtualRegister(DestReg) && + SrcVal.isMachineOpcode() && + SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { + // Instead building a COPY to that vreg destination, build an + // IMPLICIT_DEF instruction instead. + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), DestReg); + break; + } + unsigned SrcReg; if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal)) SrcReg = R->getReg(); else SrcReg = getVR(SrcVal, VRBaseMap); - unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); if (SrcReg == DestReg) // Coalesced away the copy? Ignore. break; diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h index 42f7846fe7c..cfe99dd977b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -42,11 +42,6 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap); - /// getDstOfCopyToRegUse - If the only use of the specified result number of - /// node is a CopyToReg, return its destination register. Return 0 otherwise. - unsigned getDstOfOnlyCopyToRegUse(SDNode *Node, - unsigned ResNo) const; - void CreateVirtualRegisters(SDNode *Node, MachineInstrBuilder &MIB, const MCInstrDesc &II, diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll index 5b25271ce17..7286d9785ed 100644 --- a/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll +++ b/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll @@ -37,3 +37,22 @@ bb6: ; preds = %bb4, %bb3 declare i32 @llvm.amdgcn.workitem.id.x() #0 attributes #0 = { nounwind readnone } + +; Make sure this won't crash. +; SI-LABEL: {{^}}vcopy_i1_undef +; SI: v_cndmask_b32_e64 +; SI: v_cndmask_b32_e64 +define <2 x float> @vcopy_i1_undef(<2 x float> addrspace(1)* %p) { +entry: + br i1 undef, label %exit, label %false + +false: + %x = load <2 x float>, <2 x float> addrspace(1)* %p + %cmp = fcmp one <2 x float> %x, zeroinitializer + br label %exit + +exit: + %c = phi <2 x i1> [ undef, %entry ], [ %cmp, %false ] + %ret = select <2 x i1> %c, <2 x float> <float 2.0, float 2.0>, <2 x float> <float 4.0, float 4.0> + ret <2 x float> %ret +} diff --git a/llvm/test/CodeGen/AMDGPU/implicit-def-muse.ll b/llvm/test/CodeGen/AMDGPU/implicit-def-muse.ll new file mode 100644 index 00000000000..fb540c3a6ba --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/implicit-def-muse.ll @@ -0,0 +1,22 @@ +; RUN: llc -march=amdgcn -stop-after=amdgpu-isel -verify-machineinstrs -o - %s | FileCheck %s + +; CHECK-LABEL: vcopy_i1_undef +; CHECK: IMPLICIT_DEF +; CHECK-NOT: COPY +; CHECK: IMPLICIT_DEF +; CHECK-NOT: COPY +; CHECK: .false: +define <2 x float> @vcopy_i1_undef(<2 x float> addrspace(1)* %p) { +entry: + br i1 undef, label %exit, label %false + +false: + %x = load <2 x float>, <2 x float> addrspace(1)* %p + %cmp = fcmp one <2 x float> %x, zeroinitializer + br label %exit + +exit: + %c = phi <2 x i1> [ undef, %entry ], [ %cmp, %false ] + %ret = select <2 x i1> %c, <2 x float> <float 2.0, float 2.0>, <2 x float> <float 4.0, float 4.0> + ret <2 x float> %ret +} |