summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Liao <michael.hliao@gmail.com>2019-05-27 18:26:29 +0000
committerMichael Liao <michael.hliao@gmail.com>2019-05-27 18:26:29 +0000
commit9c70c574b4fec75f4c8a530891e6e412e7ad77be (patch)
tree061032339bf69b8552f61964d75564ca7ee46f46
parentf4040a0dd81b1bb4d1a4704492d1642c09190f56 (diff)
downloadbcm5719-llvm-9c70c574b4fec75f4c8a530891e6e412e7ad77be.tar.gz
bcm5719-llvm-9c70c574b4fec75f4c8a530891e6e412e7ad77be.zip
[SelectionDAG] Enhance the simplification of `copyto` from `implicit-def`.
Summary: - The current implementation simplifies the case where the source of `copyto` is `implicit-def`ed. However, it only works when that `implicit-def` is single-used since it detects that from `implicit-def` and cannot determine which destination vreg should be used if there are multiple uses. - This patch changes that detection when `copyto` is being emitted. If that `copyto`'s source is defined from `implicit-def`, it simplifies it. Hence, it works even that `implicit-def` is multi-used. - Except it simplifies the internal IR, it won't improve the quality of code generation. However, it helps to detect 'implicit-def` in a straight-forward manner in some passes, such as `si-i1-copies`. A test case is added. Reviewers: sunfish, nhaehnle Subscribers: jvesely, hiraditya, asbirlea, llvm-commits, yaxunl Tags: #llvm Differential Revision: https://reviews.llvm.org/D62342 llvm-svn: 361777
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp40
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h5
-rw-r--r--llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll19
-rw-r--r--llvm/test/CodeGen/AMDGPU/implicit-def-muse.ll22
4 files changed, 55 insertions, 31 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 4b78d1bb6b1..8533a94c48a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -186,24 +186,6 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
assert(isNew && "Node emitted out of order - early");
}
-/// getDstOfCopyToRegUse - If the only use of the specified result number of
-/// node is a CopyToReg, return its destination register. Return 0 otherwise.
-unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node,
- unsigned ResNo) const {
- if (!Node->hasOneUse())
- return 0;
-
- SDNode *User = *Node->use_begin();
- if (User->getOpcode() == ISD::CopyToReg &&
- User->getOperand(2).getNode() == Node &&
- User->getOperand(2).getResNo() == ResNo) {
- unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg))
- return Reg;
- }
- return 0;
-}
-
void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
MachineInstrBuilder &MIB,
const MCInstrDesc &II,
@@ -286,14 +268,11 @@ unsigned InstrEmitter::getVR(SDValue Op,
if (Op.isMachineOpcode() &&
Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
// Add an IMPLICIT_DEF instruction before every use.
- unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo());
// IMPLICIT_DEF can produce any type of result so its MCInstrDesc
// does not include operand register class info.
- if (!VReg) {
- const TargetRegisterClass *RC = TLI->getRegClassFor(
- Op.getSimpleValueType(), Op.getNode()->isDivergent());
- VReg = MRI->createVirtualRegister(RC);
- }
+ const TargetRegisterClass *RC = TLI->getRegClassFor(
+ Op.getSimpleValueType(), Op.getNode()->isDivergent());
+ unsigned VReg = MRI->createVirtualRegister(RC);
BuildMI(*MBB, InsertPos, Op.getDebugLoc(),
TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
return VReg;
@@ -1011,14 +990,23 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
case ISD::TokenFactor: // fall thru
break;
case ISD::CopyToReg: {
- unsigned SrcReg;
+ unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
SDValue SrcVal = Node->getOperand(2);
+ if (TargetRegisterInfo::isVirtualRegister(DestReg) &&
+ SrcVal.isMachineOpcode() &&
+ SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ // Instead building a COPY to that vreg destination, build an
+ // IMPLICIT_DEF instruction instead.
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
+ break;
+ }
+ unsigned SrcReg;
if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal))
SrcReg = R->getReg();
else
SrcReg = getVR(SrcVal, VRBaseMap);
- unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
if (SrcReg == DestReg) // Coalesced away the copy? Ignore.
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 42f7846fe7c..cfe99dd977b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -42,11 +42,6 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter {
unsigned SrcReg,
DenseMap<SDValue, unsigned> &VRBaseMap);
- /// getDstOfCopyToRegUse - If the only use of the specified result number of
- /// node is a CopyToReg, return its destination register. Return 0 otherwise.
- unsigned getDstOfOnlyCopyToRegUse(SDNode *Node,
- unsigned ResNo) const;
-
void CreateVirtualRegisters(SDNode *Node,
MachineInstrBuilder &MIB,
const MCInstrDesc &II,
diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll
index 5b25271ce17..7286d9785ed 100644
--- a/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll
+++ b/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll
@@ -37,3 +37,22 @@ bb6: ; preds = %bb4, %bb3
declare i32 @llvm.amdgcn.workitem.id.x() #0
attributes #0 = { nounwind readnone }
+
+; Make sure this won't crash.
+; SI-LABEL: {{^}}vcopy_i1_undef
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+define <2 x float> @vcopy_i1_undef(<2 x float> addrspace(1)* %p) {
+entry:
+ br i1 undef, label %exit, label %false
+
+false:
+ %x = load <2 x float>, <2 x float> addrspace(1)* %p
+ %cmp = fcmp one <2 x float> %x, zeroinitializer
+ br label %exit
+
+exit:
+ %c = phi <2 x i1> [ undef, %entry ], [ %cmp, %false ]
+ %ret = select <2 x i1> %c, <2 x float> <float 2.0, float 2.0>, <2 x float> <float 4.0, float 4.0>
+ ret <2 x float> %ret
+}
diff --git a/llvm/test/CodeGen/AMDGPU/implicit-def-muse.ll b/llvm/test/CodeGen/AMDGPU/implicit-def-muse.ll
new file mode 100644
index 00000000000..fb540c3a6ba
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/implicit-def-muse.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=amdgcn -stop-after=amdgpu-isel -verify-machineinstrs -o - %s | FileCheck %s
+
+; CHECK-LABEL: vcopy_i1_undef
+; CHECK: IMPLICIT_DEF
+; CHECK-NOT: COPY
+; CHECK: IMPLICIT_DEF
+; CHECK-NOT: COPY
+; CHECK: .false:
+define <2 x float> @vcopy_i1_undef(<2 x float> addrspace(1)* %p) {
+entry:
+ br i1 undef, label %exit, label %false
+
+false:
+ %x = load <2 x float>, <2 x float> addrspace(1)* %p
+ %cmp = fcmp one <2 x float> %x, zeroinitializer
+ br label %exit
+
+exit:
+ %c = phi <2 x i1> [ undef, %entry ], [ %cmp, %false ]
+ %ret = select <2 x i1> %c, <2 x float> <float 2.0, float 2.0>, <2 x float> <float 4.0, float 4.0>
+ ret <2 x float> %ret
+}
OpenPOWER on IntegriCloud