diff options
-rw-r--r-- | llvm/lib/CodeGen/RegisterCoalescer.cpp | 171 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pre-coalesce-2.ll | 281 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pre-coalesce.ll | 51 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pre-coalesce.mir | 122 |
4 files changed, 625 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 57d5b5013d8..9334764d54b 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" @@ -189,6 +190,9 @@ namespace { /// This returns true if an interval was modified. bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); + /// We found a copy which can be moved to its less frequent predecessor. + bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI); + /// If the source of a copy is defined by a /// trivial computation, replace the copy by rematerialize the definition. bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI, @@ -861,6 +865,167 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, return true; } +/// For copy B = A in BB2, if A is defined by A = B in BB0 which is a +/// predecessor of BB2, and if B is not redefined on the way from A = B +/// in BB2 to B = A in BB2, B = A in BB2 is partially redundant if the +/// execution goes through the path from BB0 to BB2. We may move B = A +/// to the predecessor without such reversed copy. +/// So we will transform the program from: +/// BB0: +/// A = B; BB1: +/// ... ... +/// / \ / +/// BB2: +/// ... +/// B = A; +/// +/// to: +/// +/// BB0: BB1: +/// A = B; ... +/// ... B = A; +/// / \ / +/// BB2: +/// ... +/// +/// A special case is when BB0 and BB2 are the same BB which is the only +/// BB in a loop: +/// BB1: +/// ... +/// BB0/BB2: ---- +/// B = A; | +/// ... | +/// A = B; | +/// |------- +/// | +/// We may hoist B = A from BB0/BB2 to BB1. +/// +/// The major preconditions for correctness to remove such partial +/// redundancy include: +/// 1. A in B = A in BB2 is defined by a PHI in BB2, and one operand of +/// the PHI is defined by the reversed copy A = B in BB0. +/// 2. No B is referenced from the start of BB2 to B = A. +/// 3. No B is defined from A = B to the end of BB0. +/// 4. BB1 has only one successor. +/// +/// 2 and 4 implicitly ensure B is not live at the end of BB1. +/// 4 guarantees BB2 is hotter than BB1, so we can only move a copy to a +/// colder place, which not only prevent endless loop, but also make sure +/// the movement of copy is beneficial. +bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, + MachineInstr &CopyMI) { + assert(!CP.isPhys()); + if (!CopyMI.isFullCopy()) + return false; + + MachineBasicBlock &MBB = *CopyMI.getParent(); + if (MBB.isEHPad()) + return false; + + if (MBB.pred_size() != 2) + return false; + + LiveInterval &IntA = + LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + LiveInterval &IntB = + LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); + + // A is defined by PHI at the entry of MBB. + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true); + VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx); + assert(AValNo && !AValNo->isUnused() && "COPY source not live"); + if (!AValNo->isPHIDef()) + return false; + + // No B is referenced before CopyMI in MBB. + if (IntB.overlaps(LIS->getMBBStartIdx(&MBB), CopyIdx)) + return false; + + // MBB has two predecessors: one contains A = B so no copy will be inserted + // for it. The other one will have a copy moved from MBB. + bool FoundReverseCopy = false; + MachineBasicBlock *CopyLeftBB = nullptr; + for (MachineBasicBlock *Pred : MBB.predecessors()) { + VNInfo *PVal = IntA.getVNInfoBefore(LIS->getMBBEndIdx(Pred)); + MachineInstr *DefMI = LIS->getInstructionFromIndex(PVal->def); + if (!DefMI || !DefMI->isFullCopy()) { + CopyLeftBB = Pred; + continue; + } + // Check DefMI is a reverse copy and it is in BB Pred. + if (DefMI->getOperand(0).getReg() != IntA.reg || + DefMI->getOperand(1).getReg() != IntB.reg || + DefMI->getParent() != Pred) { + CopyLeftBB = Pred; + continue; + } + // If there is any other def of B after DefMI and before the end of Pred, + // we need to keep the copy of B = A at the end of Pred if we remove + // B = A from MBB. + bool ValB_Changed = false; + for (auto VNI : IntB.valnos) { + if (VNI->isUnused()) + continue; + if (PVal->def < VNI->def && VNI->def < LIS->getMBBEndIdx(Pred)) { + ValB_Changed = true; + break; + } + } + if (ValB_Changed) { + CopyLeftBB = Pred; + continue; + } + FoundReverseCopy = true; + } + + // If no reverse copy is found in predecessors, nothing to do. + if (!FoundReverseCopy) + return false; + + // If CopyLeftBB is nullptr, it means every predecessor of MBB contains + // reverse copy, CopyMI can be removed trivially if only IntA/IntB is updated. + // If CopyLeftBB is not nullptr, move CopyMI from MBB to CopyLeftBB and + // update IntA/IntB. + // + // If CopyLeftBB is not nullptr, ensure CopyLeftBB has a single succ so + // MBB is hotter than CopyLeftBB. + if (CopyLeftBB && CopyLeftBB->succ_size() > 1) + return false; + + // Now ok to move copy. + if (CopyLeftBB) { + DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to BB#" + << CopyLeftBB->getNumber() << '\t' << CopyMI); + + // Insert new copy to CopyLeftBB. + auto InsPos = CopyLeftBB->getFirstTerminator(); + MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(), + TII->get(TargetOpcode::COPY), IntB.reg) + .addReg(IntA.reg); + SlotIndex NewCopyIdx = + LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot(); + VNInfo *VNI = IntB.getNextValue(NewCopyIdx, LIS->getVNInfoAllocator()); + IntB.createDeadDef(VNI); + } else { + DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from BB#" + << MBB.getNumber() << '\t' << CopyMI); + } + + // Remove CopyMI. + SmallVector<SlotIndex, 8> EndPoints; + VNInfo *BValNo = IntB.Query(CopyIdx.getRegSlot()).valueOutOrDead(); + LIS->pruneValue(IntB, CopyIdx.getRegSlot(), &EndPoints); + BValNo->markUnused(); + LIS->RemoveMachineInstrFromMaps(CopyMI); + CopyMI.eraseFromParent(); + + // Extend IntB to the EndPoints of its original live interval. + LIS->extendToIndices(IntB, EndPoints); + + shrinkToUses(&IntA); + return true; +} + /// Returns true if @p MI defines the full vreg @p Reg, as opposed to just /// defining a subregister. static bool definesFullReg(const MachineInstr &MI, unsigned Reg) { @@ -1486,6 +1651,12 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { } } + // Try and see if we can partially eliminate the copy by moving the copy to + // its predecessor. + if (!CP.isPartial() && !CP.isPhys()) + if (removePartialRedundancy(CP, *CopyMI)) + return true; + // Otherwise, we are unable to join the intervals. DEBUG(dbgs() << "\tInterference!\n"); Again = true; // May be possible to coalesce later. diff --git a/llvm/test/CodeGen/X86/pre-coalesce-2.ll b/llvm/test/CodeGen/X86/pre-coalesce-2.ll new file mode 100644 index 00000000000..90fcd1875d4 --- /dev/null +++ b/llvm/test/CodeGen/X86/pre-coalesce-2.ll @@ -0,0 +1,281 @@ +; RUN: llc -regalloc=greedy -verify-coalescing -mtriple=x86_64-unknown-linux-gnu < %s +; Check the live range is updated properly after register coalescing. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@.str = internal unnamed_addr constant { [17 x i8], [47 x i8] } { [17 x i8] c"0123456789ABCDEF\00", [47 x i8] zeroinitializer }, align 32 +@b = common local_unnamed_addr global i32 0, align 4 +@a = common local_unnamed_addr global i32* null, align 8 +@__sancov_gen_cov = private global [9 x i32] zeroinitializer + +; Function Attrs: nounwind sanitize_address +define void @fn2(i8* %p1) local_unnamed_addr #0 { +entry: + %0 = load atomic i32, i32* inttoptr (i64 add (i64 ptrtoint ([9 x i32]* @__sancov_gen_cov to i64), i64 4) to i32*) monotonic, align 4 + %1 = icmp sge i32 0, %0 + br i1 %1, label %2, label %3 + +; <label>:2: ; preds = %entry + call void @__sanitizer_cov(i32* inttoptr (i64 add (i64 ptrtoint ([9 x i32]* @__sancov_gen_cov to i64), i64 4) to i32*)) + call void asm sideeffect "", ""() + br label %3 + +; <label>:3: ; preds = %entry, %2 + br label %while.cond.outer + +while.cond.outer: ; preds = %75, %3 + %e.0.ph = phi i8* [ %e.058, %75 ], [ undef, %3 ] + %c.0.ph = phi i32* [ %c.059, %75 ], [ undef, %3 ] + %p1.addr.0.ph = phi i8* [ %incdec.ptr60, %75 ], [ %p1, %3 ] + %4 = ptrtoint i8* %p1.addr.0.ph to i64 + %5 = lshr i64 %4, 3 + %6 = add i64 %5, 2147450880 + %7 = inttoptr i64 %6 to i8* + %8 = load i8, i8* %7 + %9 = icmp ne i8 %8, 0 + br i1 %9, label %10, label %15 + +; <label>:10: ; preds = %while.cond.outer + %11 = and i64 %4, 7 + %12 = trunc i64 %11 to i8 + %13 = icmp sge i8 %12, %8 + br i1 %13, label %14, label %15 + +; <label>:14: ; preds = %10 + call void @__asan_report_load1(i64 %4) + call void asm sideeffect "", ""() + unreachable + +; <label>:15: ; preds = %10, %while.cond.outer + %16 = load i8, i8* %p1.addr.0.ph, align 1 + call void @__sanitizer_cov_trace_cmp1(i8 %16, i8 0) + %cmp57 = icmp eq i8 %16, 0 + br i1 %cmp57, label %while.cond.outer.enoent.loopexit96_crit_edge, label %while.body.preheader + +while.cond.outer.enoent.loopexit96_crit_edge: ; preds = %15 + %17 = load atomic i32, i32* inttoptr (i64 add (i64 ptrtoint ([9 x i32]* @__sancov_gen_cov to i64), i64 8) to i32*) monotonic, align 4 + %18 = icmp sge i32 0, %17 + br i1 %18, label %19, label %20 + +; <label>:19: ; preds = %while.cond.outer.enoent.loopexit96_crit_edge + call void @__sanitizer_cov(i32* inttoptr (i64 add (i64 ptrtoint ([9 x i32]* @__sancov_gen_cov to i64), i64 8) to i32*)) + call void asm sideeffect "", ""() + br label %20 + +; <label>:20: ; preds = %while.cond.outer.enoent.loopexit96_crit_edge, %19 + br label %enoent.loopexit96 + +while.body.preheader: ; preds = %15 + br label %while.body + +while.body: ; preds = %56, %while.body.preheader + %21 = phi i8 [ %52, %56 ], [ %16, %while.body.preheader ] + %p1.addr.0.ph.pn = phi i8* [ %incdec.ptr60, %56 ], [ %p1.addr.0.ph, %while.body.preheader ] + %c.059 = phi i32* [ %incdec.ptr18, %56 ], [ %c.0.ph, %while.body.preheader ] + %e.058 = phi i8* [ %incdec.ptr60, %56 ], [ %e.0.ph, %while.body.preheader ] + %incdec.ptr60 = getelementptr inbounds i8, i8* %p1.addr.0.ph.pn, i64 1 + %conv = sext i8 %21 to i32 + %call = tail call i32 (i8*, i32, ...) bitcast (i32 (...)* @fn3 to i32 (i8*, i32, ...)*)(i8* getelementptr inbounds ({ [17 x i8], [47 x i8] }, { [17 x i8], [47 x i8] }* @.str, i32 0, i32 0, i64 0), i32 %conv) #2 + call void @__sanitizer_cov_trace_cmp4(i32 %call, i32 0) + %tobool = icmp eq i32 %call, 0 + br i1 %tobool, label %if.end5, label %cleanup + +if.end5: ; preds = %while.body + call void @__sanitizer_cov_trace_cmp1(i8 %21, i8 58) + %cmp6 = icmp eq i8 %21, 58 + br i1 %cmp6, label %if.end14, label %cleanup.thread40 + +if.end14: ; preds = %if.end5 + %22 = load i8, i8* inttoptr (i64 add (i64 lshr (i64 ptrtoint (i32** @a to i64), i64 3), i64 2147450880) to i8*) + %23 = icmp ne i8 %22, 0 + br i1 %23, label %24, label %25 + +; <label>:24: ; preds = %if.end14 + call void @__asan_report_load8(i64 ptrtoint (i32** @a to i64)) + call void asm sideeffect "", ""() + unreachable + +; <label>:25: ; preds = %if.end14 + %26 = load i32*, i32** @a, align 8 + %tobool15 = icmp eq i32* %26, null + br i1 %tobool15, label %cleanup.thread39, label %cleanup23.loopexit + +cleanup.thread39: ; preds = %25 + %incdec.ptr18 = getelementptr inbounds i32, i32* %c.059, i64 1 + %27 = ptrtoint i32* %c.059 to i64 + %28 = lshr i64 %27, 3 + %29 = add i64 %28, 2147450880 + %30 = inttoptr i64 %29 to i8* + %31 = load i8, i8* %30 + %32 = icmp ne i8 %31, 0 + br i1 %32, label %33, label %39 + +; <label>:33: ; preds = %cleanup.thread39 + %34 = and i64 %27, 7 + %35 = add i64 %34, 3 + %36 = trunc i64 %35 to i8 + %37 = icmp sge i8 %36, %31 + br i1 %37, label %38, label %39 + +; <label>:38: ; preds = %33 + call void @__asan_report_store4(i64 %27) + call void asm sideeffect "", ""() + unreachable + +; <label>:39: ; preds = %33, %cleanup.thread39 + store i32 0, i32* %c.059, align 4 + %40 = ptrtoint i8* %incdec.ptr60 to i64 + %41 = lshr i64 %40, 3 + %42 = add i64 %41, 2147450880 + %43 = inttoptr i64 %42 to i8* + %44 = load i8, i8* %43 + %45 = icmp ne i8 %44, 0 + br i1 %45, label %46, label %51 + +; <label>:46: ; preds = %39 + %47 = and i64 %40, 7 + %48 = trunc i64 %47 to i8 + %49 = icmp sge i8 %48, %44 + br i1 %49, label %50, label %51 + +; <label>:50: ; preds = %46 + call void @__asan_report_load1(i64 %40) + call void asm sideeffect "", ""() + unreachable + +; <label>:51: ; preds = %46, %39 + %52 = load i8, i8* %incdec.ptr60, align 1 + call void @__sanitizer_cov_trace_cmp1(i8 %52, i8 0) + %cmp = icmp eq i8 %52, 0 + br i1 %cmp, label %enoent.loopexit, label %cleanup.thread39.while.body_crit_edge + +cleanup.thread39.while.body_crit_edge: ; preds = %51 + %53 = load atomic i32, i32* inttoptr (i64 add (i64 ptrtoint ([9 x i32]* @__sancov_gen_cov to i64), i64 12) to i32*) monotonic, align 4 + %54 = icmp sge i32 0, %53 + br i1 %54, label %55, label %56 + +; <label>:55: ; preds = %cleanup.thread39.while.body_crit_edge + call void @__sanitizer_cov(i32* inttoptr (i64 add (i64 ptrtoint ([9 x i32]* @__sancov_gen_cov to i64), i64 12) to i32*)) + call void asm sideeffect "", ""() + br label %56 + +; <label>:56: ; preds = %cleanup.thread39.while.body_crit_edge, %55 + br label %while.body + +cleanup.thread40: ; preds = %if.end5 + %57 = load atomic i32, i32* inttoptr (i64 add (i64 ptrtoint ([9 x i32]* @__sancov_gen_cov to i64), i64 16) to i32*) monotonic, align 4 + %58 = icmp sge i32 0, %57 + br i1 %58, label %59, label %60 + +; <label>:59: ; preds = %cleanup.thread40 + call void @__sanitizer_cov(i32* inttoptr (i64 add (i64 ptrtoint ([9 x i32]* @__sancov_gen_cov to i64), i64 16) to i32*)) + call void asm sideeffect "", ""() + br label %60 + +; <label>:60: ; preds = %cleanup.thread40, %59 + %call20 = tail call i32 (i8*, ...) bitcast (i32 (...)* @fn4 to i32 (i8*, ...)*)(i8* %e.058) #2 + br label %enoent + +cleanup: ; preds = %while.body + %61 = load i8, i8* inttoptr (i64 add (i64 lshr (i64 ptrtoint (i32* @b to i64), i64 3), i64 2147450880) to i8*) + %62 = icmp ne i8 %61, 0 + br i1 %62, label %63, label %66 + +; <label>:63: ; preds = %cleanup + %64 = icmp sge i8 trunc (i64 add (i64 and (i64 ptrtoint (i32* @b to i64), i64 7), i64 3) to i8), %61 + br i1 %64, label %65, label %66 + +; <label>:65: ; preds = %63 + call void @__asan_report_load4(i64 ptrtoint (i32* @b to i64)) + call void asm sideeffect "", ""() + unreachable + +; <label>:66: ; preds = %63, %cleanup + %67 = load i32, i32* @b, align 4 + call void @__sanitizer_cov_trace_cmp4(i32 %67, i32 0) + %tobool3 = icmp eq i32 %67, 0 + br i1 %tobool3, label %cleanup.while.cond.outer_crit_edge, label %cleanup.enoent.loopexit96_crit_edge + +cleanup.enoent.loopexit96_crit_edge: ; preds = %66 + %68 = load atomic i32, i32* inttoptr (i64 add (i64 ptrtoint ([9 x i32]* @__sancov_gen_cov to i64), i64 20) to i32*) monotonic, align 4 + %69 = icmp sge i32 0, %68 + br i1 %69, label %70, label %71 + +; <label>:70: ; preds = %cleanup.enoent.loopexit96_crit_edge + call void @__sanitizer_cov(i32* inttoptr (i64 add (i64 ptrtoint ([9 x i32]* @__sancov_gen_cov to i64), i64 20) to i32*)) + call void asm sideeffect "", ""() + br label %71 + +; <label>:71: ; preds = %cleanup.enoent.loopexit96_crit_edge, %70 + br label %enoent.loopexit96 + +cleanup.while.cond.outer_crit_edge: ; preds = %66 + %72 = load atomic i32, i32* inttoptr (i64 add (i64 ptrtoint ([9 x i32]* @__sancov_gen_cov to i64), i64 24) to i32*) monotonic, align 4 + %73 = icmp sge i32 0, %72 + br i1 %73, label %74, label %75 + +; <label>:74: ; preds = %cleanup.while.cond.outer_crit_edge + call void @__sanitizer_cov(i32* inttoptr (i64 add (i64 ptrtoint ([9 x i32]* @__sancov_gen_cov to i64), i64 24) to i32*)) + call void asm sideeffect "", ""() + br label %75 + +; <label>:75: ; preds = %cleanup.while.cond.outer_crit_edge, %74 + br label %while.cond.outer + +enoent.loopexit: ; preds = %51 + %76 = load atomic i32, i32* inttoptr (i64 add (i64 ptrtoint ([9 x i32]* @__sancov_gen_cov to i64), i64 28) to i32*) monotonic, align 4 + %77 = icmp sge i32 0, %76 + br i1 %77, label %78, label %79 + +; <label>:78: ; preds = %enoent.loopexit + call void @__sanitizer_cov(i32* inttoptr (i64 add (i64 ptrtoint ([9 x i32]* @__sancov_gen_cov to i64), i64 28) to i32*)) + call void asm sideeffect "", ""() + br label %79 + +; <label>:79: ; preds = %enoent.loopexit, %78 + br label %enoent + +enoent.loopexit96: ; preds = %71, %20 + br label %enoent + +enoent: ; preds = %enoent.loopexit96, %79, %60 + %call22 = tail call i32* (...) @fn1() #2 + br label %cleanup23 + +cleanup23.loopexit: ; preds = %25 + %80 = load atomic i32, i32* inttoptr (i64 add (i64 ptrtoint ([9 x i32]* @__sancov_gen_cov to i64), i64 32) to i32*) monotonic, align 4 + %81 = icmp sge i32 0, %80 + br i1 %81, label %82, label %83 + +; <label>:82: ; preds = %cleanup23.loopexit + call void @__sanitizer_cov(i32* inttoptr (i64 add (i64 ptrtoint ([9 x i32]* @__sancov_gen_cov to i64), i64 32) to i32*)) + call void asm sideeffect "", ""() + br label %83 + +; <label>:83: ; preds = %cleanup23.loopexit, %82 + br label %cleanup23 + +cleanup23: ; preds = %83, %enoent + ret void +} + +declare i32 @fn3(...) local_unnamed_addr #1 + +declare i32 @fn4(...) local_unnamed_addr #1 + +declare i32* @fn1(...) local_unnamed_addr #1 + +declare void @__sanitizer_cov(i32*) + +declare void @__sanitizer_cov_trace_cmp1(i8, i8) + +declare void @__sanitizer_cov_trace_cmp4(i32, i32) + +declare void @__asan_report_load1(i64) + +declare void @__asan_report_load4(i64) + +declare void @__asan_report_load8(i64) + +declare void @__asan_report_store4(i64) + diff --git a/llvm/test/CodeGen/X86/pre-coalesce.ll b/llvm/test/CodeGen/X86/pre-coalesce.ll new file mode 100644 index 00000000000..9cd6365453c --- /dev/null +++ b/llvm/test/CodeGen/X86/pre-coalesce.ll @@ -0,0 +1,51 @@ +; RUN: llc -regalloc=greedy -mtriple=x86_64-unknown-linux-gnu < %s -o - | FileCheck %s +; +; The test is to check no redundent mov as follows will be generated in %while.body loop. +; .LBB0_2: +; movsbl %cl, %ecx +; movl %edx, %eax ==> This movl can be promoted outside of loop. +; shll $5, %eax +; ... +; movl %eax, %edx +; jne .LBB0_2 +; +; CHECK-LABEL: foo: +; CHECK: [[L0:.LBB0_[0-9]+]]: # %while.body +; CHECK: movl %[[REGA:.*]], %[[REGB:.*]] +; CHECK-NOT: movl %[[REGB]], %[[REGA]] +; CHECK: jne [[L0]] +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@b = common local_unnamed_addr global i8* null, align 8 +@a = common local_unnamed_addr global i32 0, align 4 + +define i32 @foo() local_unnamed_addr { +entry: + %t0 = load i8*, i8** @b, align 8 + %t1 = load i8, i8* %t0, align 1 + %cmp4 = icmp eq i8 %t1, 0 + %t2 = load i32, i32* @a, align 4 + br i1 %cmp4, label %while.end, label %while.body.preheader + +while.body.preheader: ; preds = %entry + br label %while.body + +while.body: ; preds = %while.body.preheader, %while.body + %t3 = phi i32 [ %add3, %while.body ], [ %t2, %while.body.preheader ] + %t4 = phi i8 [ %t5, %while.body ], [ %t1, %while.body.preheader ] + %conv = sext i8 %t4 to i32 + %add = mul i32 %t3, 33 + %add3 = add nsw i32 %add, %conv + store i32 %add3, i32* @a, align 4 + %t5 = load i8, i8* %t0, align 1 + %cmp = icmp eq i8 %t5, 0 + br i1 %cmp, label %while.end.loopexit, label %while.body + +while.end.loopexit: ; preds = %while.body + br label %while.end + +while.end: ; preds = %while.end.loopexit, %entry + %.lcssa = phi i32 [ %t2, %entry ], [ %add3, %while.end.loopexit ] + ret i32 %.lcssa +} diff --git a/llvm/test/CodeGen/X86/pre-coalesce.mir b/llvm/test/CodeGen/X86/pre-coalesce.mir new file mode 100644 index 00000000000..11805fe090b --- /dev/null +++ b/llvm/test/CodeGen/X86/pre-coalesce.mir @@ -0,0 +1,122 @@ +# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass simple-register-coalescing -o - %s | FileCheck %s +# Check there is no partial redundent copy left in the loop after register coalescing. +--- | + ; ModuleID = '<stdin>' + source_filename = "<stdin>" + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux-gnu" + + @b = common local_unnamed_addr global i8* null, align 8 + @a = common local_unnamed_addr global i32 0, align 4 + + define i32 @foo() local_unnamed_addr { + entry: + %t0 = load i8*, i8** @b, align 8 + %t1 = load i8, i8* %t0, align 1 + %cmp4 = icmp eq i8 %t1, 0 + %t2 = load i32, i32* @a, align 4 + br i1 %cmp4, label %while.end, label %while.body.preheader + + while.body.preheader: ; preds = %entry + br label %while.body + + while.body: ; preds = %while.body, %while.body.preheader + %t3 = phi i32 [ %add3, %while.body ], [ %t2, %while.body.preheader ] + %t4 = phi i8 [ %t5, %while.body ], [ %t1, %while.body.preheader ] + %conv = sext i8 %t4 to i32 + %add = mul i32 %t3, 33 + %add3 = add nsw i32 %add, %conv + store i32 %add3, i32* @a, align 4 + %t5 = load i8, i8* %t0, align 1 + %cmp = icmp eq i8 %t5, 0 + br i1 %cmp, label %while.end, label %while.body + + while.end: ; preds = %while.body, %entry + %.lcssa = phi i32 [ %t2, %entry ], [ %add3, %while.body ] + ret i32 %.lcssa + } + +... +--- +# Check A = B and B = A copies will not exist in the loop at the same time. +# CHECK: name: foo +# CHECK: [[L1:bb.3.while.body]]: +# CHECK: %[[REGA:.*]] = COPY %[[REGB:.*]] +# CHECK-NOT: %[[REGB]] = COPY %[[REGA]] +# CHECK: JNE_1 %[[L1]] + +name: foo +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: gr64 } + - { id: 1, class: gr8 } + - { id: 2, class: gr32 } + - { id: 3, class: gr32 } + - { id: 4, class: gr8 } + - { id: 5, class: gr32 } + - { id: 6, class: gr8 } + - { id: 7, class: gr32 } + - { id: 8, class: gr32 } + - { id: 9, class: gr32 } + - { id: 10, class: gr32 } + - { id: 11, class: gr32 } + - { id: 12, class: gr8 } + - { id: 13, class: gr32 } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + successors: %bb.4(0x30000000), %bb.1.while.body.preheader(0x50000000) + + %0 = MOV64rm %rip, 1, _, @b, _ :: (dereferenceable load 8 from @b) + %12 = MOV8rm %0, 1, _, 0, _ :: (load 1 from %ir.t0) + TEST8rr %12, %12, implicit-def %eflags + %11 = MOV32rm %rip, 1, _, @a, _ :: (dereferenceable load 4 from @a) + JNE_1 %bb.1.while.body.preheader, implicit killed %eflags + + bb.4: + successors: %bb.3.while.end(0x80000000) + + %10 = COPY %11 + JMP_1 %bb.3.while.end + + bb.1.while.body.preheader: + successors: %bb.2.while.body(0x80000000) + + bb.2.while.body: + successors: %bb.3.while.end(0x04000000), %bb.2.while.body(0x7c000000) + + %8 = MOVSX32rr8 %12 + %10 = COPY %11 + %10 = SHL32ri %10, 5, implicit-def dead %eflags + %10 = ADD32rr %10, %11, implicit-def dead %eflags + %10 = ADD32rr %10, %8, implicit-def dead %eflags + MOV32mr %rip, 1, _, @a, _, %10 :: (store 4 into @a) + %12 = MOV8rm %0, 1, _, 0, _ :: (load 1 from %ir.t0) + TEST8rr %12, %12, implicit-def %eflags + %11 = COPY %10 + JNE_1 %bb.2.while.body, implicit killed %eflags + JMP_1 %bb.3.while.end + + bb.3.while.end: + %eax = COPY %10 + RET 0, killed %eax + +... |