summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-01-08 23:10:47 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-01-08 23:10:47 +0000
commit2c807410fd37942b5b866eecfc52d7f0126db99f (patch)
tree786dd2261e6cdf506682e6deb818fb537ef9e4f0
parent0e3299dc60f038ab6b7224206adb27aa0582865d (diff)
downloadbcm5719-llvm-2c807410fd37942b5b866eecfc52d7f0126db99f.tar.gz
bcm5719-llvm-2c807410fd37942b5b866eecfc52d7f0126db99f.zip
RegisterCoalescer: Defer clearing implicit_def lanes
We can't go back and recover the lanes if it turns out the implicit_def really can't be erased. Assume all lanes are valid if an unresolved conflict is encountered. There aren't any tests where this seems to matter either way, but this seems like a safer option. Fixes bug 39602 llvm-svn: 350676
-rw-r--r--llvm/lib/CodeGen/RegisterCoalescer.cpp49
-rw-r--r--llvm/test/CodeGen/AMDGPU/regcoalesce-keep-valid-lanes-implicit-def-bug39602.mir57
2 files changed, 90 insertions, 16 deletions
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 2c1bc6df37e..9a689e9a17c 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -2508,8 +2508,10 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
// We normally expect IMPLICIT_DEF values to be live only until the end
// of their block. If the value is really live longer and gets pruned in
// another block, this flag is cleared again.
+ //
+ // Clearing the valid lanes is deferred until it is sure this can be
+ // erased.
V.ErasableImplicitDef = true;
- V.ValidLanes &= ~V.WriteLanes;
}
}
}
@@ -2564,20 +2566,25 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
Other.computeAssignment(V.OtherVNI->id, *this);
Val &OtherV = Other.Vals[V.OtherVNI->id];
- // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
- // This shouldn't normally happen, but ProcessImplicitDefs can leave such
- // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
- // technically.
- //
- // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
- // to erase the IMPLICIT_DEF instruction.
- if (OtherV.ErasableImplicitDef && DefMI &&
- DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
- LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
- << " extends into "
- << printMBBReference(*DefMI->getParent())
- << ", keeping it.\n");
- OtherV.ErasableImplicitDef = false;
+ if (OtherV.ErasableImplicitDef) {
+ // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
+ // This shouldn't normally happen, but ProcessImplicitDefs can leave such
+ // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
+ // technically.
+ //
+ // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
+ // to erase the IMPLICIT_DEF instruction.
+ if (DefMI &&
+ DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
+ LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
+ << " extends into "
+ << printMBBReference(*DefMI->getParent())
+ << ", keeping it.\n");
+ OtherV.ErasableImplicitDef = false;
+ } else {
+ // We deferred clearing these lanes in case we needed to save them
+ OtherV.ValidLanes &= ~OtherV.WriteLanes;
+ }
}
// Allow overlapping PHI values. Any real interference would show up in a
@@ -2701,8 +2708,18 @@ void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
Val &OtherV = Other.Vals[V.OtherVNI->id];
// We cannot erase an IMPLICIT_DEF if we don't have valid values for all
// its lanes.
- if ((OtherV.WriteLanes & ~V.ValidLanes).any() && TrackSubRegLiveness)
+ if (OtherV.ErasableImplicitDef &&
+ TrackSubRegLiveness &&
+ (OtherV.WriteLanes & ~V.ValidLanes).any()) {
+ LLVM_DEBUG(dbgs() << "Cannot erase implicit_def with missing values\n");
+
OtherV.ErasableImplicitDef = false;
+ // The valid lanes written by the implicit_def were speculatively cleared
+ // before, so make this more conservative. It may be better to track this,
+ // I haven't found a testcase where it matters.
+ OtherV.ValidLanes = LaneBitmask::getAll();
+ }
+
OtherV.Pruned = true;
LLVM_FALLTHROUGH;
}
diff --git a/llvm/test/CodeGen/AMDGPU/regcoalesce-keep-valid-lanes-implicit-def-bug39602.mir b/llvm/test/CodeGen/AMDGPU/regcoalesce-keep-valid-lanes-implicit-def-bug39602.mir
new file mode 100644
index 00000000000..f7a9915a198
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/regcoalesce-keep-valid-lanes-implicit-def-bug39602.mir
@@ -0,0 +1,57 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-coalescing -run-pass=simple-register-coalescing -verify-machineinstrs -o - %s | FileCheck %s
+
+# Bug 39602: Avoid "Couldn't join subrange" error when clearing valid
+# lanes on an implicit_def that later cannot be erased.
+
+---
+name: lost_valid_lanes_maybe_erasable_implicit_def
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: lost_valid_lanes_maybe_erasable_implicit_def
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: undef %0.sub1:sreg_64 = IMPLICIT_DEF
+ ; CHECK: bb.1:
+ ; CHECK: %0.sub0:sreg_64 = S_MOV_B32 0
+ ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY %0
+ ; CHECK: dead %0.sub1:sreg_64 = COPY %0.sub0
+ ; CHECK: S_ENDPGM implicit [[COPY]].sub1
+ bb.0:
+ successors: %bb.1
+ undef %0.sub1:sreg_64 = IMPLICIT_DEF
+
+ bb.1:
+ %0.sub0:sreg_64 = S_MOV_B32 0
+ %1:sreg_64 = COPY %0:sreg_64
+ dead %0.sub1:sreg_64 = COPY %0.sub0:sreg_64
+ S_ENDPGM implicit %1.sub1:sreg_64
+
+...
+---
+# Same as previous, except with a real value instead of
+# IMPLICIT_DEF. These should both be handled the same way.
+
+name: lost_valid_lanes_real_value
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: lost_valid_lanes_real_value
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: undef %0.sub1:sreg_64 = S_MOV_B32 -1
+ ; CHECK: bb.1:
+ ; CHECK: %0.sub0:sreg_64 = S_MOV_B32 0
+ ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY %0
+ ; CHECK: dead %0.sub1:sreg_64 = COPY %0.sub0
+ ; CHECK: S_ENDPGM implicit [[COPY]].sub1
+ bb.0:
+ successors: %bb.1
+ undef %0.sub1:sreg_64 = S_MOV_B32 -1
+
+ bb.1:
+ %0.sub0:sreg_64 = S_MOV_B32 0
+ %1:sreg_64 = COPY %0:sreg_64
+ dead %0.sub1:sreg_64 = COPY %0.sub0:sreg_64
+ S_ENDPGM implicit %1.sub1:sreg_64
+
+...
OpenPOWER on IntegriCloud