diff options
-rw-r--r-- | llvm/lib/CodeGen/RegisterCoalescer.cpp | 72 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/coalescing-with-subregs-in-loop-bug.mir | 215 |
2 files changed, 19 insertions, 268 deletions
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index ba83e5f0cd1..db5258e6270 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -2085,24 +2085,9 @@ class JoinVals { LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const; /// Find the ultimate value that VNI was copied from. - std::tuple<const VNInfo*,unsigned, bool> - followCopyChain(const VNInfo *VNI, unsigned OtherReg) const; - - /// Determine whether Val0 for Reg and Val1 for Other.Reg are identical. - /// The first element of the returned pair is true if they are, false - /// otherwise. The second element is true if one value is defined - /// directly via the other, e.g.: - /// %reg0 = ... - /// %regx = COPY %reg0 ;; reg1 defined via reg0 - /// %reg1 = COPY %regx ;; val0 == val1 - /// -> { true, true } - /// vs - /// %reg0 = COPY %regy - /// %regx = COPY %regy - /// $reg1 = COPY %regx ;; val0 == val1 - /// -> { true, false } - std::pair<bool,bool> valuesIdentical(VNInfo *Val0, VNInfo *Val1, - const JoinVals &Other) const; + std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const; + + bool valuesIdentical(VNInfo *Val0, VNInfo *Val1, const JoinVals &Other) const; /// Analyze ValNo in this live range, and set all fields of Vals[ValNo]. /// Return a conflict resolution when possible, but leave the hard cases as @@ -2218,22 +2203,19 @@ LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) return L; } -std::tuple<const VNInfo*, unsigned, bool> JoinVals::followCopyChain( - const VNInfo *VNI, unsigned OtherReg) const { +std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain( + const VNInfo *VNI) const { unsigned Reg = this->Reg; - bool UsedOtherReg = false; while (!VNI->isPHIDef()) { SlotIndex Def = VNI->def; MachineInstr *MI = Indexes->getInstructionFromIndex(Def); assert(MI && "No defining instruction"); if (!MI->isFullCopy()) - return std::make_tuple(VNI, Reg, UsedOtherReg); + return std::make_pair(VNI, Reg); unsigned SrcReg = MI->getOperand(1).getReg(); - if (SrcReg == OtherReg) - UsedOtherReg = true; if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) - return std::make_tuple(VNI, Reg, UsedOtherReg); + return std::make_pair(VNI, Reg); const LiveInterval &LI = LIS->getInterval(SrcReg); const VNInfo *ValueIn; @@ -2259,28 +2241,26 @@ std::tuple<const VNInfo*, unsigned, bool> JoinVals::followCopyChain( VNI = ValueIn; Reg = SrcReg; } - return std::make_tuple(VNI, Reg, UsedOtherReg); + return std::make_pair(VNI, Reg); } -std::pair<bool,bool> JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1, - const JoinVals &Other) const { +bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1, + const JoinVals &Other) const { const VNInfo *Orig0; unsigned Reg0; - bool Other0; - std::tie(Orig0, Reg0, Other0) = followCopyChain(Value0, Other.Reg); - if (Orig0 == Value1 && Reg0 == Other.Reg) - return std::make_pair(true, Other0); + std::tie(Orig0, Reg0) = followCopyChain(Value0); + if (Orig0 == Value1) + return true; const VNInfo *Orig1; unsigned Reg1; - bool Other1; - std::tie(Orig1, Reg1, Other1) = Other.followCopyChain(Value1, Reg); + std::tie(Orig1, Reg1) = Other.followCopyChain(Value1); // The values are equal if they are defined at the same place and use the // same register. Note that we cannot compare VNInfos directly as some of // them might be from a copy created in mergeSubRangeInto() while the other // is from the original LiveInterval. - return std::make_pair(Orig0->def == Orig1->def && Reg0 == Reg1, Other1); + return Orig0->def == Orig1->def && Reg0 == Reg1; } JoinVals::ConflictResolution @@ -2450,22 +2430,9 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // %other = COPY %ext // %this = COPY %ext <-- Erase this copy // - // One case to be careful about is a case where the travesal of the - // COPY chain for one register encounters a use of the other register, - // e.g. - // 10 %other = COPY ... - // 20 %x = COPY %other - // 30 %this = COPY %x ;; assume that liveness of %this extends to the - // ;; end of the block (then back to the entry phi) - // Coalescing %this and %other can force a gap in the live range between - // 20r and 30r, and if the COPY at 30 is erased, its liveness will no longer - // extend to the end of the block (an IMPLICIT_DEF may incorrectly be added - // later on to create a definition that is live on exit). - if (DefMI->isFullCopy() && !CP.isPartial()) { - std::pair<bool,bool> P = valuesIdentical(VNI, V.OtherVNI, Other); - if (P.first) - return P.second ? CR_Merge : CR_Erase; - } + if (DefMI->isFullCopy() && !CP.isPartial() + && valuesIdentical(VNI, V.OtherVNI, Other)) + return CR_Erase; // If the lanes written by this instruction were all undef in OtherVNI, it is // still safe to join the live ranges. This can't be done with a simple value @@ -2990,8 +2957,7 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo); - LLVM_DEBUG(dbgs() << "\t\tjoined lanes: " << PrintLaneMask(LaneMask) << ' ' - << LRange << "\n"); + LLVM_DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n"); if (EndPoints.empty()) return; diff --git a/llvm/test/CodeGen/AMDGPU/coalescing-with-subregs-in-loop-bug.mir b/llvm/test/CodeGen/AMDGPU/coalescing-with-subregs-in-loop-bug.mir deleted file mode 100644 index 3ceffc23f91..00000000000 --- a/llvm/test/CodeGen/AMDGPU/coalescing-with-subregs-in-loop-bug.mir +++ /dev/null @@ -1,215 +0,0 @@ -# RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -run-pass=simple-register-coalescing,rename-independent-subregs %s -o - | FileCheck -check-prefix=GCN %s - -# This test is for a bug where the following happens: -# -# Inside the loop, %29.sub2 is used in a V_LSHLREV whose result is then used -# in an LDS read. %29 is a 128 bit value that is linked by copies to -# %45 (from phi elimination), %28 (the value in the loop pre-header), -# %31 (defined and subreg-modified in the loop, and used after the loop) -# and %30: -# -# %45:vreg_128 = COPY killed %28 -# bb.39: -# %29:vreg_128 = COPY killed %45 -# %39:vgpr_32 = V_LSHLREV_B32_e32 2, %29.sub2, implicit $exec -# %31:vreg_128 = COPY killed %29 -# %31.sub1:vreg_128 = COPY %34 -# %30:vreg_128 = COPY %31 -# %45:vreg_128 = COPY killed %30 -# S_CBRANCH_EXECNZ %bb.39, implicit $exec -# S_BRANCH %bb.40 -# bb.40: -# undef %8615.sub0:vreg_128 = COPY killed %31.sub0 -# -# So this coalesces together into a single 128 bit value whose sub1 is modified -# in the loop, but the sub2 used in the V_LSHLREV is not modified in the loop. -# -# The bug is that the coalesced value has a L00000004 subrange (for sub2) that -# says that it is not live up to the end of the loop block. The symptom is that -# Rename Independent Subregs separates sub2 into its own register, and it is -# not live round the loop, so that pass adds an IMPLICIT_DEF for it just before -# the loop backedge. - -# GCN: bb.1 (%ir-block.6): -# GCN: V_LSHLREV_B32_e32 2, [[val:%[0-9][0-9]*]].sub2 -# GCN-NOT: [[val]]:vreg_128 = IMPLICIT_DEF - ---- | - target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" - target triple = "amdgcn--amdpal" - - define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg, i32 inreg, i32 inreg, <3 x i32> inreg, i32 inreg, <3 x i32>) local_unnamed_addr #0 { - .entry: - br label %6 - - ; <label>:6: ; preds = %6, %.entry - %7 = bitcast i32 addrspace(3)* undef to <2 x i32> addrspace(3)* - %8 = bitcast i32 addrspace(3)* undef to <2 x i32> addrspace(3)* - %9 = bitcast i32 addrspace(3)* undef to <2 x i32> addrspace(3)* - %10 = bitcast i32 addrspace(3)* undef to <2 x i32> addrspace(3)* - %11 = bitcast i32 addrspace(3)* undef to <2 x i32> addrspace(3)* - %12 = bitcast i32 addrspace(3)* undef to <2 x i32> addrspace(3)* - br i1 undef, label %13, label %6 - - ; <label>:13: ; preds = %6 - ret void - } - - attributes #0 = { "target-cpu"="gfx803" } - -... ---- -name: _amdgpu_cs_main -alignment: 0 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -registers: - - { id: 0, class: sreg_64, preferred-register: '' } - - { id: 1, class: sreg_64, preferred-register: '' } - - { id: 2, class: sreg_64, preferred-register: '' } - - { id: 3, class: sgpr_32, preferred-register: '' } - - { id: 4, class: sgpr_32, preferred-register: '' } - - { id: 5, class: sgpr_32, preferred-register: '' } - - { id: 6, class: sgpr_32, preferred-register: '' } - - { id: 7, class: sreg_32_xm0, preferred-register: '' } - - { id: 8, class: sgpr_32, preferred-register: '' } - - { id: 9, class: sreg_32_xm0, preferred-register: '' } - - { id: 10, class: sreg_32_xm0, preferred-register: '' } - - { id: 11, class: sreg_32_xm0, preferred-register: '' } - - { id: 12, class: vreg_64, preferred-register: '' } - - { id: 13, class: vreg_64, preferred-register: '' } - - { id: 14, class: vreg_64, preferred-register: '' } - - { id: 15, class: vreg_64, preferred-register: '' } - - { id: 16, class: vreg_64, preferred-register: '' } - - { id: 17, class: vreg_64, preferred-register: '' } - - { id: 18, class: sreg_64, preferred-register: '$vcc' } - - { id: 19, class: vreg_128, preferred-register: '' } - - { id: 20, class: vreg_128, preferred-register: '' } - - { id: 21, class: vreg_128, preferred-register: '' } - - { id: 22, class: vreg_128, preferred-register: '' } - - { id: 23, class: vreg_128, preferred-register: '' } - - { id: 24, class: vreg_128, preferred-register: '' } - - { id: 25, class: vreg_128, preferred-register: '' } - - { id: 26, class: vreg_128, preferred-register: '' } - - { id: 27, class: vreg_128, preferred-register: '' } - - { id: 28, class: vreg_128, preferred-register: '' } - - { id: 29, class: vreg_128, preferred-register: '' } - - { id: 30, class: vreg_128, preferred-register: '' } - - { id: 31, class: vreg_128, preferred-register: '' } - - { id: 32, class: vreg_128, preferred-register: '' } - - { id: 33, class: vgpr_32, preferred-register: '' } - - { id: 34, class: sreg_32, preferred-register: '' } - - { id: 35, class: vreg_128, preferred-register: '' } - - { id: 36, class: vreg_128, preferred-register: '' } - - { id: 37, class: vreg_128, preferred-register: '' } - - { id: 38, class: vgpr_32, preferred-register: '' } - - { id: 39, class: vgpr_32, preferred-register: '' } - - { id: 40, class: vgpr_32, preferred-register: '' } - - { id: 41, class: vgpr_32, preferred-register: '' } - - { id: 42, class: vreg_128, preferred-register: '' } - - { id: 43, class: sreg_64, preferred-register: '' } - - { id: 44, class: vreg_128, preferred-register: '' } - - { id: 45, class: vreg_128, preferred-register: '' } -liveins: -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - stackProtector: '' - maxCallFrameSize: 4294967295 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: -stack: -constants: -body: | - bb.0..entry: - successors: %bb.1(0x80000000) - - %3:sgpr_32 = S_MOV_B32 0 - undef %19.sub1:vreg_128 = COPY undef %3 - dead %4:sgpr_32 = S_MOV_B32 1 - %20:vreg_128 = COPY killed %19 - %20.sub1:vreg_128 = COPY undef %4 - dead %5:sgpr_32 = S_MOV_B32 2 - %21:vreg_128 = COPY killed %20 - %21.sub1:vreg_128 = COPY undef %5 - dead %6:sgpr_32 = S_MOV_B32 3 - %22:vreg_128 = COPY killed %21 - %22.sub1:vreg_128 = COPY undef %6 - dead %7:sreg_32_xm0 = S_MOV_B32 4 - %23:vreg_128 = COPY killed %22 - %23.sub1:vreg_128 = COPY undef %7 - dead %8:sgpr_32 = S_MOV_B32 5 - %24:vreg_128 = COPY killed %23 - %24.sub1:vreg_128 = COPY undef %8 - dead %9:sreg_32_xm0 = S_MOV_B32 6 - %25:vreg_128 = COPY killed %24 - %25.sub1:vreg_128 = COPY undef %9 - dead %10:sreg_32_xm0 = S_MOV_B32 7 - %26:vreg_128 = COPY killed %25 - %26.sub1:vreg_128 = COPY undef %10 - %11:sreg_32_xm0 = S_MOV_B32 255 - %27:vreg_128 = COPY killed %26 - %27.sub1:vreg_128 = COPY %11 - %28:vreg_128 = COPY killed %27 - %28.sub2:vreg_128 = COPY killed %11 - %2:sreg_64 = S_MOV_B64 0 - %34:sreg_32 = S_MOV_B32 7 - %37:vreg_128 = COPY undef %42:vreg_128 - %43:sreg_64 = COPY killed %2 - %44:vreg_128 = COPY killed %37 - %45:vreg_128 = COPY killed %28 - - bb.1 (%ir-block.6): - successors: %bb.2(0x04000000), %bb.1(0x7c000000) - - %29:vreg_128 = COPY killed %45 - %36:vreg_128 = COPY killed %44 - %0:sreg_64 = COPY killed %43 - %39:vgpr_32 = V_LSHLREV_B32_e32 2, %29.sub2, implicit $exec - %41:vgpr_32 = V_ADD_I32_e32 1152, %39, implicit-def dead $vcc, implicit $exec - $m0 = S_MOV_B32 -1 - %12:vreg_64 = DS_READ2_B32 killed %41, 0, 1, 0, implicit $m0, implicit $exec :: (load 8 from %ir.7, align 4, addrspace 3) - %13:vreg_64 = DS_READ2_B32 %39, -112, -111, 0, implicit $m0, implicit $exec :: (load 8 from %ir.8, align 4, addrspace 3) - %14:vreg_64 = DS_READ2_B32 %39, 0, 1, 0, implicit $m0, implicit $exec :: (load 8 from %ir.9, align 4, addrspace 3) - %40:vgpr_32 = V_ADD_I32_e32 1160, %39, implicit-def dead $vcc, implicit $exec - %15:vreg_64 = DS_READ2_B32 killed %40, 0, 1, 0, implicit $m0, implicit $exec :: (load 8 from %ir.10, align 4, addrspace 3) - %16:vreg_64 = DS_READ2_B32 %39, -110, -109, 0, implicit $m0, implicit $exec :: (load 8 from %ir.11, align 4, addrspace 3) - %17:vreg_64 = DS_READ2_B32 %39, 2, 3, 0, implicit $m0, implicit $exec :: (load 8 from %ir.12, align 4, addrspace 3) - undef %35.sub1:vreg_128 = COPY undef %34 - %31:vreg_128 = COPY killed %29 - %31.sub1:vreg_128 = COPY %34 - %38:vgpr_32 = V_ADD_I32_e32 1, %36.sub0, implicit-def dead $vcc, implicit $exec - %18:sreg_64 = V_CMP_LT_I32_e64 5, %38, implicit $exec - %1:sreg_64 = S_OR_B64 killed %18, killed %0, implicit-def $scc - %30:vreg_128 = COPY %31 - %43:sreg_64 = COPY %1 - %44:vreg_128 = COPY %35 - %45:vreg_128 = COPY killed %30 - $exec = S_ANDN2_B64_term $exec, %1 - S_CBRANCH_EXECNZ %bb.1, implicit $exec - S_BRANCH %bb.2 - - bb.2 (%ir-block.13): - $exec = S_OR_B64 $exec, killed %1, implicit-def $scc - %33:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - undef %32.sub0:vreg_128 = COPY killed %31.sub0 - %32.sub2:vreg_128 = COPY %33 - S_ENDPGM - -... |