diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
29 files changed, 649 insertions, 699 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp index ca77795ce10..35dd9eb0a47 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp @@ -161,8 +161,8 @@ static bool isWrapperOnlyCall(CallSite CS) { return false; } if (isa<ReturnInst>(*std::next(I->getIterator()))) { - DEBUG(dbgs() << " Wrapper only call detected: " - << Callee->getName() << '\n'); + LLVM_DEBUG(dbgs() << " Wrapper only call detected: " + << Callee->getName() << '\n'); return true; } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp index dd45ced6ecc..7a7ed7a4f06 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -765,8 +765,7 @@ bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) { ArrayRef<double> tmp(DVal); nval = ConstantDataVector::get(context, tmp); } - DEBUG(errs() << "AMDIC: " << *CI - << " ---> " << *nval << "\n"); + LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n"); replaceCall(nval); return true; } @@ -776,8 +775,7 @@ bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) { for (int i = 0; i < sz; ++i) { if (CF->isExactlyValue(ftbl[i].input)) { Value *nval = ConstantFP::get(CF->getType(), ftbl[i].result); - DEBUG(errs() << "AMDIC: " << *CI - << " ---> " << *nval << "\n"); + LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n"); replaceCall(nval); return true; } @@ -798,11 +796,11 @@ bool AMDGPULibCalls::replaceWithNative(CallInst *CI, const FuncInfo &FInfo) { AMDGPULibFunc nf = FInfo; nf.setPrefix(AMDGPULibFunc::NATIVE); if (Constant *FPExpr = getFunction(M, nf)) { - DEBUG(dbgs() << "AMDIC: " << *CI << " ---> "); + LLVM_DEBUG(dbgs() << "AMDIC: " << *CI << " ---> "); CI->setCalledFunction(FPExpr); - DEBUG(dbgs() << *CI << '\n'); + LLVM_DEBUG(dbgs() << *CI << '\n'); return true; } @@ -820,8 +818,7 @@ bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B, Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0), opr0, "recip2div"); - DEBUG(errs() << "AMDIC: " << *CI - << " ---> " << *nval << "\n"); + LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n"); replaceCall(nval); return true; } @@ -899,7 +896,7 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B, if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0) || CZero) { // pow/powr/pown(x, 0) == 1 - DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n"); + LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n"); Constant *cnval = ConstantFP::get(eltType, 1.0); if (getVecSize(FInfo) > 1) { cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); @@ -909,23 +906,21 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B, } if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) { // pow/powr/pown(x, 1.0) = x - DEBUG(errs() << "AMDIC: " << *CI - << " ---> " << *opr0 << "\n"); + LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n"); replaceCall(opr0); return true; } if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) { // pow/powr/pown(x, 2.0) = x*x - DEBUG(errs() << "AMDIC: " << *CI - << " ---> " << *opr0 << " * " << *opr0 << "\n"); + LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * " << *opr0 + << "\n"); Value *nval = B.CreateFMul(opr0, opr0, "__pow2"); replaceCall(nval); return true; } if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) { // pow/powr/pown(x, -1.0) = 1.0/x - DEBUG(errs() << "AMDIC: " << *CI - << " ---> 1 / " << *opr0 << "\n"); + LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1 / " << *opr0 << "\n"); Constant *cnval = ConstantFP::get(eltType, 1.0); if (getVecSize(FInfo) > 1) { cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); @@ -942,8 +937,8 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B, if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT : AMDGPULibFunc::EI_RSQRT, FInfo))) { - DEBUG(errs() << "AMDIC: " << *CI << " ---> " - << FInfo.getName().c_str() << "(" << *opr0 << ")\n"); + LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " + << FInfo.getName().c_str() << "(" << *opr0 << ")\n"); Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt" : "__pow2rsqrt"); replaceCall(nval); @@ -999,8 +994,9 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B, } nval = B.CreateFDiv(cnval, nval, "__1powprod"); } - DEBUG(errs() << "AMDIC: " << *CI << " ---> " - << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0 << ")\n"); + LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " + << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0 + << ")\n"); replaceCall(nval); return true; } @@ -1137,8 +1133,8 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B, nval = B.CreateBitCast(nval, opr0->getType()); } - DEBUG(errs() << "AMDIC: " << *CI << " ---> " - << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n"); + LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " + << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n"); replaceCall(nval); return true; @@ -1155,8 +1151,7 @@ bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B, } int ci_opr1 = (int)CINT->getSExtValue(); if (ci_opr1 == 1) { // rootn(x, 1) = x - DEBUG(errs() << "AMDIC: " << *CI - << " ---> " << *opr0 << "\n"); + LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n"); replaceCall(opr0); return true; } @@ -1166,7 +1161,7 @@ bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B, Module *M = CI->getModule(); if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) { - DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n"); + LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n"); Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt"); replaceCall(nval); return true; @@ -1175,13 +1170,13 @@ bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B, Module *M = CI->getModule(); if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) { - DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n"); + LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n"); Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt"); replaceCall(nval); return true; } } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x - DEBUG(errs() << "AMDIC: " << *CI << " ---> 1.0 / " << *opr0 << "\n"); + LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1.0 / " << *opr0 << "\n"); Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0), opr0, "__rootn2div"); @@ -1193,7 +1188,8 @@ bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B, Module *M = CI->getModule(); if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) { - DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0 << ")\n"); + LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0 + << ")\n"); Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt"); replaceCall(nval); return true; @@ -1212,22 +1208,22 @@ bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B, ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1); if ((CF0 && CF0->isZero()) || (CF1 && CF1->isZero())) { // fma/mad(a, b, c) = c if a=0 || b=0 - DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr2 << "\n"); + LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr2 << "\n"); replaceCall(opr2); return true; } if (CF0 && CF0->isExactlyValue(1.0f)) { // fma/mad(a, b, c) = b+c if a=1 - DEBUG(errs() << "AMDIC: " << *CI << " ---> " - << *opr1 << " + " << *opr2 << "\n"); + LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr1 << " + " << *opr2 + << "\n"); Value *nval = B.CreateFAdd(opr1, opr2, "fmaadd"); replaceCall(nval); return true; } if (CF1 && CF1->isExactlyValue(1.0f)) { // fma/mad(a, b, c) = a+c if b=1 - DEBUG(errs() << "AMDIC: " << *CI << " ---> " - << *opr0 << " + " << *opr2 << "\n"); + LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " + " << *opr2 + << "\n"); Value *nval = B.CreateFAdd(opr0, opr2, "fmaadd"); replaceCall(nval); return true; @@ -1235,8 +1231,8 @@ bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B, if (ConstantFP *CF = dyn_cast<ConstantFP>(opr2)) { if (CF->isZero()) { // fma/mad(a, b, c) = a*b if c=0 - DEBUG(errs() << "AMDIC: " << *CI << " ---> " - << *opr0 << " * " << *opr1 << "\n"); + LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * " + << *opr1 << "\n"); Value *nval = B.CreateFMul(opr0, opr1, "fmamul"); replaceCall(nval); return true; @@ -1263,8 +1259,8 @@ bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B, if (Constant *FPExpr = getNativeFunction( CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) { Value *opr0 = CI->getArgOperand(0); - DEBUG(errs() << "AMDIC: " << *CI << " ---> " - << "sqrt(" << *opr0 << ")\n"); + LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " + << "sqrt(" << *opr0 << ")\n"); Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt"); replaceCall(nval); return true; @@ -1355,8 +1351,8 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B, P = B.CreateAddrSpaceCast(Alloc, PTy); CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P); - DEBUG(errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI - << ") with " << *Call << "\n"); + LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI << ") with " + << *Call << "\n"); if (!isSin) { // CI->cos, UI->sin B.SetInsertPoint(&*ItOld); @@ -1719,9 +1715,8 @@ bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) { bool Changed = false; auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); - DEBUG(dbgs() << "AMDIC: process function "; - F.printAsOperand(dbgs(), false, F.getParent()); - dbgs() << '\n';); + LLVM_DEBUG(dbgs() << "AMDIC: process function "; + F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';); if (!EnablePreLink) Changed |= setFastFlags(F, Options); @@ -1737,8 +1732,8 @@ bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) { Function *Callee = CI->getCalledFunction(); if (Callee == 0) continue; - DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n"; - dbgs().flush()); + LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n"; + dbgs().flush()); if(Simplifier.fold(CI, AA)) Changed = true; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp index b1d61fd3ede..612777981f9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp @@ -659,7 +659,7 @@ RegionMRT *MRT::buildMRT(MachineFunction &MF, continue; } - DEBUG(dbgs() << "Visiting " << printMBBReference(*MBB) << "\n"); + LLVM_DEBUG(dbgs() << "Visiting " << printMBBReference(*MBB) << "\n"); MBBMRT *NewMBB = new MBBMRT(MBB); MachineRegion *Region = RegionInfo->getRegionFor(MBB); @@ -696,18 +696,19 @@ void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg, const TargetRegisterInfo *TRI, PHILinearize &PHIInfo) { if (TRI->isVirtualRegister(Reg)) { - DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI) << "\n"); + LLVM_DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI) + << "\n"); // If this is a source register to a PHI we are chaining, it // must be live out. if (PHIInfo.isSource(Reg)) { - DEBUG(dbgs() << "Add LiveOut (PHI): " << printReg(Reg, TRI) << "\n"); + LLVM_DEBUG(dbgs() << "Add LiveOut (PHI): " << printReg(Reg, TRI) << "\n"); addLiveOut(Reg); } else { // If this is live out of the MBB for (auto &UI : MRI->use_operands(Reg)) { if (UI.getParent()->getParent() != MBB) { - DEBUG(dbgs() << "Add LiveOut (MBB " << printMBBReference(*MBB) - << "): " << printReg(Reg, TRI) << "\n"); + LLVM_DEBUG(dbgs() << "Add LiveOut (MBB " << printMBBReference(*MBB) + << "): " << printReg(Reg, TRI) << "\n"); addLiveOut(Reg); } else { // If the use is in the same MBB we have to make sure @@ -718,8 +719,8 @@ void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg, MIE = UseInstr->getParent()->instr_end(); MII != MIE; ++MII) { if ((&(*MII)) == DefInstr) { - DEBUG(dbgs() << "Add LiveOut (Loop): " << printReg(Reg, TRI) - << "\n"); + LLVM_DEBUG(dbgs() << "Add LiveOut (Loop): " << printReg(Reg, TRI) + << "\n"); addLiveOut(Reg); } } @@ -735,11 +736,12 @@ void LinearizedRegion::storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg, const TargetRegisterInfo *TRI, PHILinearize &PHIInfo) { if (TRI->isVirtualRegister(Reg)) { - DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI) << "\n"); + LLVM_DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI) + << "\n"); for (auto &UI : MRI->use_operands(Reg)) { if (!Region->contains(UI.getParent()->getParent())) { - DEBUG(dbgs() << "Add LiveOut (Region " << (void *)Region - << "): " << printReg(Reg, TRI) << "\n"); + LLVM_DEBUG(dbgs() << "Add LiveOut (Region " << (void *)Region + << "): " << printReg(Reg, TRI) << "\n"); addLiveOut(Reg); } } @@ -750,8 +752,8 @@ void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, const TargetRegisterInfo *TRI, PHILinearize &PHIInfo) { - DEBUG(dbgs() << "-Store Live Outs Begin (" << printMBBReference(*MBB) - << ")-\n"); + LLVM_DEBUG(dbgs() << "-Store Live Outs Begin (" << printMBBReference(*MBB) + << ")-\n"); for (auto &II : *MBB) { for (auto &RI : II.defs()) { storeLiveOutReg(MBB, RI.getReg(), RI.getParent(), MRI, TRI, PHIInfo); @@ -775,9 +777,10 @@ void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB, for (int i = 0; i < numPreds; ++i) { if (getPHIPred(PHI, i) == MBB) { unsigned PHIReg = getPHISourceReg(PHI, i); - DEBUG(dbgs() << "Add LiveOut (PhiSource " << printMBBReference(*MBB) - << " -> " << printMBBReference(*(*SI)) - << "): " << printReg(PHIReg, TRI) << "\n"); + LLVM_DEBUG(dbgs() + << "Add LiveOut (PhiSource " << printMBBReference(*MBB) + << " -> " << printMBBReference(*(*SI)) + << "): " << printReg(PHIReg, TRI) << "\n"); addLiveOut(PHIReg); } } @@ -785,7 +788,7 @@ void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB, } } - DEBUG(dbgs() << "-Store Live Outs Endn-\n"); + LLVM_DEBUG(dbgs() << "-Store Live Outs Endn-\n"); } void LinearizedRegion::storeMBBLiveOuts(MachineBasicBlock *MBB, @@ -845,8 +848,8 @@ void LinearizedRegion::storeLiveOuts(RegionMRT *Region, for (int i = 0; i < numPreds; ++i) { if (Region->contains(getPHIPred(PHI, i))) { unsigned PHIReg = getPHISourceReg(PHI, i); - DEBUG(dbgs() << "Add Region LiveOut (" << (void *)Region - << "): " << printReg(PHIReg, TRI) << "\n"); + LLVM_DEBUG(dbgs() << "Add Region LiveOut (" << (void *)Region + << "): " << printReg(PHIReg, TRI) << "\n"); addLiveOut(PHIReg); } } @@ -910,20 +913,21 @@ void LinearizedRegion::replaceRegister(unsigned Register, unsigned NewRegister, bool IncludeLoopPHI) { assert(Register != NewRegister && "Cannot replace a reg with itself"); - DEBUG(dbgs() << "Pepareing to replace register (region): " - << printReg(Register, MRI->getTargetRegisterInfo()) << " with " - << printReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n"); + LLVM_DEBUG( + dbgs() << "Pepareing to replace register (region): " + << printReg(Register, MRI->getTargetRegisterInfo()) << " with " + << printReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n"); // If we are replacing outside, we also need to update the LiveOuts if (ReplaceOutside && (isLiveOut(Register) || this->getParent()->isLiveOut(Register))) { LinearizedRegion *Current = this; while (Current != nullptr && Current->getEntry() != nullptr) { - DEBUG(dbgs() << "Region before register replace\n"); - DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo())); + LLVM_DEBUG(dbgs() << "Region before register replace\n"); + LLVM_DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo())); Current->replaceLiveOut(Register, NewRegister); - DEBUG(dbgs() << "Region after register replace\n"); - DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo())); + LLVM_DEBUG(dbgs() << "Region after register replace\n"); + LLVM_DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo())); Current = Current->getParent(); } } @@ -947,16 +951,16 @@ void LinearizedRegion::replaceRegister(unsigned Register, unsigned NewRegister, if (ShouldReplace) { if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) { - DEBUG(dbgs() << "Trying to substitute physical register: " - << printReg(NewRegister, MRI->getTargetRegisterInfo()) - << "\n"); + LLVM_DEBUG(dbgs() << "Trying to substitute physical register: " + << printReg(NewRegister, MRI->getTargetRegisterInfo()) + << "\n"); llvm_unreachable("Cannot substitute physical registers"); } else { - DEBUG(dbgs() << "Replacing register (region): " - << printReg(Register, MRI->getTargetRegisterInfo()) - << " with " - << printReg(NewRegister, MRI->getTargetRegisterInfo()) - << "\n"); + LLVM_DEBUG(dbgs() << "Replacing register (region): " + << printReg(Register, MRI->getTargetRegisterInfo()) + << " with " + << printReg(NewRegister, MRI->getTargetRegisterInfo()) + << "\n"); O.setReg(NewRegister); } } @@ -1023,18 +1027,18 @@ void LinearizedRegion::removeFalseRegisterKills(MachineRegisterInfo *MRI) { if (hasNoDef(Reg, MRI)) continue; if (!MRI->hasOneDef(Reg)) { - DEBUG(this->getEntry()->getParent()->dump()); - DEBUG(dbgs() << printReg(Reg, TRI) << "\n"); + LLVM_DEBUG(this->getEntry()->getParent()->dump()); + LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << "\n"); } if (MRI->def_begin(Reg) == MRI->def_end()) { - DEBUG(dbgs() << "Register " - << printReg(Reg, MRI->getTargetRegisterInfo()) - << " has NO defs\n"); + LLVM_DEBUG(dbgs() << "Register " + << printReg(Reg, MRI->getTargetRegisterInfo()) + << " has NO defs\n"); } else if (!MRI->hasOneDef(Reg)) { - DEBUG(dbgs() << "Register " - << printReg(Reg, MRI->getTargetRegisterInfo()) - << " has multiple defs\n"); + LLVM_DEBUG(dbgs() << "Register " + << printReg(Reg, MRI->getTargetRegisterInfo()) + << " has multiple defs\n"); } assert(MRI->hasOneDef(Reg) && "Register has multiple definitions"); @@ -1042,8 +1046,8 @@ void LinearizedRegion::removeFalseRegisterKills(MachineRegisterInfo *MRI) { MachineOperand *UseOperand = &(RI); bool UseIsOutsideDefMBB = Def->getParent()->getParent() != MBB; if (UseIsOutsideDefMBB && UseOperand->isKill()) { - DEBUG(dbgs() << "Removing kill flag on register: " - << printReg(Reg, TRI) << "\n"); + LLVM_DEBUG(dbgs() << "Removing kill flag on register: " + << printReg(Reg, TRI) << "\n"); UseOperand->setIsKill(false); } } @@ -1416,8 +1420,8 @@ void AMDGPUMachineCFGStructurizer::extractKilledPHIs(MachineBasicBlock *MBB) { MachineInstr &Instr = *I; if (Instr.isPHI()) { unsigned PHIDestReg = getPHIDestReg(Instr); - DEBUG(dbgs() << "Extractking killed phi:\n"); - DEBUG(Instr.dump()); + LLVM_DEBUG(dbgs() << "Extractking killed phi:\n"); + LLVM_DEBUG(Instr.dump()); PHIs.insert(&Instr); PHIInfo.addDest(PHIDestReg, Instr.getDebugLoc()); storePHILinearizationInfoDest(PHIDestReg, Instr); @@ -1449,9 +1453,10 @@ bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI, MachineBasicBlock *SourceMBB, SmallVector<unsigned, 2> &PHIIndices, unsigned *ReplaceReg) { - DEBUG(dbgs() << "Shrink PHI: "); - DEBUG(PHI.dump()); - DEBUG(dbgs() << " to " << printReg(getPHIDestReg(PHI), TRI) << " = PHI("); + LLVM_DEBUG(dbgs() << "Shrink PHI: "); + LLVM_DEBUG(PHI.dump()); + LLVM_DEBUG(dbgs() << " to " << printReg(getPHIDestReg(PHI), TRI) + << " = PHI("); bool Replaced = false; unsigned NumInputs = getPHINumInputs(PHI); @@ -1481,8 +1486,8 @@ bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI, if (SourceMBB) { MIB.addReg(CombinedSourceReg); MIB.addMBB(SourceMBB); - DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", " - << printMBBReference(*SourceMBB)); + LLVM_DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", " + << printMBBReference(*SourceMBB)); } for (unsigned i = 0; i < NumInputs; ++i) { @@ -1493,10 +1498,10 @@ bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI, MachineBasicBlock *SourcePred = getPHIPred(PHI, i); MIB.addReg(SourceReg); MIB.addMBB(SourcePred); - DEBUG(dbgs() << printReg(SourceReg, TRI) << ", " - << printMBBReference(*SourcePred)); + LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", " + << printMBBReference(*SourcePred)); } - DEBUG(dbgs() << ")\n"); + LLVM_DEBUG(dbgs() << ")\n"); } PHI.eraseFromParent(); return Replaced; @@ -1505,9 +1510,10 @@ bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI, void AMDGPUMachineCFGStructurizer::replacePHI( MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *LastMerge, SmallVector<unsigned, 2> &PHIRegionIndices) { - DEBUG(dbgs() << "Replace PHI: "); - DEBUG(PHI.dump()); - DEBUG(dbgs() << " with " << printReg(getPHIDestReg(PHI), TRI) << " = PHI("); + LLVM_DEBUG(dbgs() << "Replace PHI: "); + LLVM_DEBUG(PHI.dump()); + LLVM_DEBUG(dbgs() << " with " << printReg(getPHIDestReg(PHI), TRI) + << " = PHI("); bool HasExternalEdge = false; unsigned NumInputs = getPHINumInputs(PHI); @@ -1524,8 +1530,8 @@ void AMDGPUMachineCFGStructurizer::replacePHI( getPHIDestReg(PHI)); MIB.addReg(CombinedSourceReg); MIB.addMBB(LastMerge); - DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", " - << printMBBReference(*LastMerge)); + LLVM_DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", " + << printMBBReference(*LastMerge)); for (unsigned i = 0; i < NumInputs; ++i) { if (isPHIRegionIndex(PHIRegionIndices, i)) { continue; @@ -1534,10 +1540,10 @@ void AMDGPUMachineCFGStructurizer::replacePHI( MachineBasicBlock *SourcePred = getPHIPred(PHI, i); MIB.addReg(SourceReg); MIB.addMBB(SourcePred); - DEBUG(dbgs() << printReg(SourceReg, TRI) << ", " - << printMBBReference(*SourcePred)); + LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", " + << printMBBReference(*SourcePred)); } - DEBUG(dbgs() << ")\n"); + LLVM_DEBUG(dbgs() << ")\n"); } else { replaceRegisterWith(getPHIDestReg(PHI), CombinedSourceReg); } @@ -1547,9 +1553,9 @@ void AMDGPUMachineCFGStructurizer::replacePHI( void AMDGPUMachineCFGStructurizer::replaceEntryPHI( MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *IfMBB, SmallVector<unsigned, 2> &PHIRegionIndices) { - DEBUG(dbgs() << "Replace entry PHI: "); - DEBUG(PHI.dump()); - DEBUG(dbgs() << " with "); + LLVM_DEBUG(dbgs() << "Replace entry PHI: "); + LLVM_DEBUG(PHI.dump()); + LLVM_DEBUG(dbgs() << " with "); unsigned NumInputs = getPHINumInputs(PHI); unsigned NumNonRegionInputs = NumInputs; @@ -1562,18 +1568,19 @@ void AMDGPUMachineCFGStructurizer::replaceEntryPHI( if (NumNonRegionInputs == 0) { auto DestReg = getPHIDestReg(PHI); replaceRegisterWith(DestReg, CombinedSourceReg); - DEBUG(dbgs() << " register " << printReg(CombinedSourceReg, TRI) << "\n"); + LLVM_DEBUG(dbgs() << " register " << printReg(CombinedSourceReg, TRI) + << "\n"); PHI.eraseFromParent(); } else { - DEBUG(dbgs() << printReg(getPHIDestReg(PHI), TRI) << " = PHI("); + LLVM_DEBUG(dbgs() << printReg(getPHIDestReg(PHI), TRI) << " = PHI("); MachineBasicBlock *MBB = PHI.getParent(); MachineInstrBuilder MIB = BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), getPHIDestReg(PHI)); MIB.addReg(CombinedSourceReg); MIB.addMBB(IfMBB); - DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", " - << printMBBReference(*IfMBB)); + LLVM_DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", " + << printMBBReference(*IfMBB)); unsigned NumInputs = getPHINumInputs(PHI); for (unsigned i = 0; i < NumInputs; ++i) { if (isPHIRegionIndex(PHIRegionIndices, i)) { @@ -1583,10 +1590,10 @@ void AMDGPUMachineCFGStructurizer::replaceEntryPHI( MachineBasicBlock *SourcePred = getPHIPred(PHI, i); MIB.addReg(SourceReg); MIB.addMBB(SourcePred); - DEBUG(dbgs() << printReg(SourceReg, TRI) << ", " - << printMBBReference(*SourcePred)); + LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", " + << printMBBReference(*SourcePred)); } - DEBUG(dbgs() << ")\n"); + LLVM_DEBUG(dbgs() << ")\n"); PHI.eraseFromParent(); } } @@ -1608,8 +1615,9 @@ void AMDGPUMachineCFGStructurizer::replaceLiveOutRegs( } } - DEBUG(dbgs() << "Register " << printReg(Reg, TRI) << " is " - << (IsDead ? "dead" : "alive") << " after PHI replace\n"); + LLVM_DEBUG(dbgs() << "Register " << printReg(Reg, TRI) << " is " + << (IsDead ? "dead" : "alive") + << " after PHI replace\n"); if (IsDead) { LRegion->removeLiveOut(Reg); } @@ -1683,8 +1691,8 @@ void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHIs(LinearizedRegion *Regi void AMDGPUMachineCFGStructurizer::insertUnconditionalBranch(MachineBasicBlock *MBB, MachineBasicBlock *Dest, const DebugLoc &DL) { - DEBUG(dbgs() << "Inserting unconditional branch: " << MBB->getNumber() - << " -> " << Dest->getNumber() << "\n"); + LLVM_DEBUG(dbgs() << "Inserting unconditional branch: " << MBB->getNumber() + << " -> " << Dest->getNumber() << "\n"); MachineBasicBlock::instr_iterator Terminator = MBB->getFirstInstrTerminator(); bool HasTerminator = Terminator != MBB->instr_end(); if (HasTerminator) { @@ -1733,7 +1741,8 @@ AMDGPUMachineCFGStructurizer::createLinearizedExitBlock(RegionMRT *Region) { MF->insert(ExitIter, LastMerge); LastMerge->addSuccessor(Exit); insertUnconditionalBranch(LastMerge, Exit); - DEBUG(dbgs() << "Created exit block: " << LastMerge->getNumber() << "\n"); + LLVM_DEBUG(dbgs() << "Created exit block: " << LastMerge->getNumber() + << "\n"); } return LastMerge; } @@ -1749,11 +1758,12 @@ void AMDGPUMachineCFGStructurizer::insertMergePHI(MachineBasicBlock *IfBB, if (MergeBB->succ_begin() == MergeBB->succ_end()) { return; } - DEBUG(dbgs() << "Merge PHI (" << printMBBReference(*MergeBB) - << "): " << printReg(DestRegister, TRI) << " = PHI(" - << printReg(IfSourceRegister, TRI) << ", " - << printMBBReference(*IfBB) << printReg(CodeSourceRegister, TRI) - << ", " << printMBBReference(*CodeBB) << ")\n"); + LLVM_DEBUG(dbgs() << "Merge PHI (" << printMBBReference(*MergeBB) + << "): " << printReg(DestRegister, TRI) << " = PHI(" + << printReg(IfSourceRegister, TRI) << ", " + << printMBBReference(*IfBB) + << printReg(CodeSourceRegister, TRI) << ", " + << printMBBReference(*CodeBB) << ")\n"); const DebugLoc &DL = MergeBB->findDebugLoc(MergeBB->begin()); MachineInstrBuilder MIB = BuildMI(*MergeBB, MergeBB->instr_begin(), DL, TII->get(TargetOpcode::PHI), DestRegister); @@ -1811,8 +1821,8 @@ static void removeExternalCFGEdges(MachineBasicBlock *StartMBB, for (auto SI : Succs) { std::pair<MachineBasicBlock *, MachineBasicBlock *> Edge = SI; - DEBUG(dbgs() << "Removing edge: " << printMBBReference(*Edge.first) - << " -> " << printMBBReference(*Edge.second) << "\n"); + LLVM_DEBUG(dbgs() << "Removing edge: " << printMBBReference(*Edge.first) + << " -> " << printMBBReference(*Edge.second) << "\n"); Edge.first->removeSuccessor(Edge.second); } } @@ -1845,13 +1855,13 @@ MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfBlock( IfBB->addSuccessor(MergeBB); IfBB->addSuccessor(CodeBBStart); - DEBUG(dbgs() << "Created If block: " << IfBB->getNumber() << "\n"); + LLVM_DEBUG(dbgs() << "Created If block: " << IfBB->getNumber() << "\n"); // Ensure that the MergeBB is a successor of the CodeEndBB. if (!CodeBBEnd->isSuccessor(MergeBB)) CodeBBEnd->addSuccessor(MergeBB); - DEBUG(dbgs() << "Moved " << printMBBReference(*CodeBBStart) << " through " - << printMBBReference(*CodeBBEnd) << "\n"); + LLVM_DEBUG(dbgs() << "Moved " << printMBBReference(*CodeBBStart) + << " through " << printMBBReference(*CodeBBEnd) << "\n"); // If we have a single predecessor we can find a reasonable debug location MachineBasicBlock *SinglePred = @@ -1936,16 +1946,18 @@ void AMDGPUMachineCFGStructurizer::rewriteCodeBBTerminator(MachineBasicBlock *Co MachineInstr *AMDGPUMachineCFGStructurizer::getDefInstr(unsigned Reg) { if (MRI->def_begin(Reg) == MRI->def_end()) { - DEBUG(dbgs() << "Register " << printReg(Reg, MRI->getTargetRegisterInfo()) - << " has NO defs\n"); + LLVM_DEBUG(dbgs() << "Register " + << printReg(Reg, MRI->getTargetRegisterInfo()) + << " has NO defs\n"); } else if (!MRI->hasOneDef(Reg)) { - DEBUG(dbgs() << "Register " << printReg(Reg, MRI->getTargetRegisterInfo()) - << " has multiple defs\n"); - DEBUG(dbgs() << "DEFS BEGIN:\n"); + LLVM_DEBUG(dbgs() << "Register " + << printReg(Reg, MRI->getTargetRegisterInfo()) + << " has multiple defs\n"); + LLVM_DEBUG(dbgs() << "DEFS BEGIN:\n"); for (auto DI = MRI->def_begin(Reg), DE = MRI->def_end(); DI != DE; ++DI) { - DEBUG(DI->getParent()->dump()); + LLVM_DEBUG(DI->getParent()->dump()); } - DEBUG(dbgs() << "DEFS END\n"); + LLVM_DEBUG(dbgs() << "DEFS END\n"); } assert(MRI->hasOneDef(Reg) && "Register has multiple definitions"); @@ -1987,7 +1999,7 @@ void AMDGPUMachineCFGStructurizer::insertChainedPHI(MachineBasicBlock *IfBB, const TargetRegisterClass *RegClass = MRI->getRegClass(DestReg); unsigned NextDestReg = MRI->createVirtualRegister(RegClass); bool IsLastDef = PHIInfo.getNumSources(DestReg) == 1; - DEBUG(dbgs() << "Insert Chained PHI\n"); + LLVM_DEBUG(dbgs() << "Insert Chained PHI\n"); insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, DestReg, NextDestReg, SourceReg, IsLastDef); @@ -2023,16 +2035,16 @@ void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB, } for (auto LI : OldLiveOuts) { - DEBUG(dbgs() << "LiveOut: " << printReg(LI, TRI)); + LLVM_DEBUG(dbgs() << "LiveOut: " << printReg(LI, TRI)); if (!containsDef(CodeBB, InnerRegion, LI) || (!IsSingleBB && (getDefInstr(LI)->getParent() == LRegion->getExit()))) { // If the register simly lives through the CodeBB, we don't have // to rewrite anything since the register is not defined in this // part of the code. - DEBUG(dbgs() << "- through"); + LLVM_DEBUG(dbgs() << "- through"); continue; } - DEBUG(dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "\n"); unsigned Reg = LI; if (/*!PHIInfo.isSource(Reg) &&*/ Reg != InnerRegion->getBBSelectRegOut()) { // If the register is live out, we do want to create a phi, @@ -2049,12 +2061,12 @@ void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB, unsigned IfSourceReg = MRI->createVirtualRegister(RegClass); // Create initializer, this value is never used, but is needed // to satisfy SSA. - DEBUG(dbgs() << "Initializer for reg: " << printReg(Reg) << "\n"); + LLVM_DEBUG(dbgs() << "Initializer for reg: " << printReg(Reg) << "\n"); TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DebugLoc(), IfSourceReg, 0); InnerRegion->replaceRegisterOutsideRegion(Reg, PHIDestReg, true, MRI); - DEBUG(dbgs() << "Insert Non-Chained Live out PHI\n"); + LLVM_DEBUG(dbgs() << "Insert Non-Chained Live out PHI\n"); insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, PHIDestReg, IfSourceReg, Reg, true); } @@ -2064,22 +2076,22 @@ void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB, // is a source block for a definition. SmallVector<unsigned, 4> Sources; if (PHIInfo.findSourcesFromMBB(CodeBB, Sources)) { - DEBUG(dbgs() << "Inserting PHI Live Out from " << printMBBReference(*CodeBB) - << "\n"); + LLVM_DEBUG(dbgs() << "Inserting PHI Live Out from " + << printMBBReference(*CodeBB) << "\n"); for (auto SI : Sources) { unsigned DestReg; PHIInfo.findDest(SI, CodeBB, DestReg); insertChainedPHI(IfBB, CodeBB, MergeBB, InnerRegion, DestReg, SI); } - DEBUG(dbgs() << "Insertion done.\n"); + LLVM_DEBUG(dbgs() << "Insertion done.\n"); } - DEBUG(PHIInfo.dump(MRI)); + LLVM_DEBUG(PHIInfo.dump(MRI)); } void AMDGPUMachineCFGStructurizer::prunePHIInfo(MachineBasicBlock *MBB) { - DEBUG(dbgs() << "Before PHI Prune\n"); - DEBUG(PHIInfo.dump(MRI)); + LLVM_DEBUG(dbgs() << "Before PHI Prune\n"); + LLVM_DEBUG(PHIInfo.dump(MRI)); SmallVector<std::tuple<unsigned, unsigned, MachineBasicBlock *>, 4> ElimiatedSources; for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; @@ -2119,8 +2131,8 @@ void AMDGPUMachineCFGStructurizer::prunePHIInfo(MachineBasicBlock *MBB) { PHIInfo.removeSource(std::get<0>(SourceInfo), std::get<1>(SourceInfo), std::get<2>(SourceInfo)); } - DEBUG(dbgs() << "After PHI Prune\n"); - DEBUG(PHIInfo.dump(MRI)); + LLVM_DEBUG(dbgs() << "After PHI Prune\n"); + LLVM_DEBUG(PHIInfo.dump(MRI)); } void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegion, @@ -2128,8 +2140,8 @@ void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegio MachineBasicBlock *Entry = CurrentRegion->getEntry(); MachineBasicBlock *Exit = CurrentRegion->getExit(); - DEBUG(dbgs() << "RegionExit: " << Exit->getNumber() - << " Pred: " << (*(Entry->pred_begin()))->getNumber() << "\n"); + LLVM_DEBUG(dbgs() << "RegionExit: " << Exit->getNumber() << " Pred: " + << (*(Entry->pred_begin()))->getNumber() << "\n"); int NumSources = 0; auto SE = PHIInfo.sources_end(DestReg); @@ -2146,7 +2158,7 @@ void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegio const DebugLoc &DL = Entry->findDebugLoc(Entry->begin()); MachineInstrBuilder MIB = BuildMI(*Entry, Entry->instr_begin(), DL, TII->get(TargetOpcode::PHI), DestReg); - DEBUG(dbgs() << "Entry PHI " << printReg(DestReg, TRI) << " = PHI("); + LLVM_DEBUG(dbgs() << "Entry PHI " << printReg(DestReg, TRI) << " = PHI("); unsigned CurrentBackedgeReg = 0; @@ -2170,19 +2182,19 @@ void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegio BackedgePHI.addReg(getPHISourceReg(*PHIDefInstr, 1)); BackedgePHI.addMBB((*SRI).second); CurrentBackedgeReg = NewBackedgeReg; - DEBUG(dbgs() << "Inserting backedge PHI: " - << printReg(NewBackedgeReg, TRI) << " = PHI(" - << printReg(CurrentBackedgeReg, TRI) << ", " - << printMBBReference(*getPHIPred(*PHIDefInstr, 0)) - << ", " - << printReg(getPHISourceReg(*PHIDefInstr, 1), TRI) - << ", " << printMBBReference(*(*SRI).second)); + LLVM_DEBUG(dbgs() + << "Inserting backedge PHI: " + << printReg(NewBackedgeReg, TRI) << " = PHI(" + << printReg(CurrentBackedgeReg, TRI) << ", " + << printMBBReference(*getPHIPred(*PHIDefInstr, 0)) << ", " + << printReg(getPHISourceReg(*PHIDefInstr, 1), TRI) << ", " + << printMBBReference(*(*SRI).second)); } } else { MIB.addReg(SourceReg); MIB.addMBB((*SRI).second); - DEBUG(dbgs() << printReg(SourceReg, TRI) << ", " - << printMBBReference(*(*SRI).second) << ", "); + LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", " + << printMBBReference(*(*SRI).second) << ", "); } } @@ -2190,16 +2202,16 @@ void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegio if (CurrentBackedgeReg != 0) { MIB.addReg(CurrentBackedgeReg); MIB.addMBB(Exit); - DEBUG(dbgs() << printReg(CurrentBackedgeReg, TRI) << ", " - << printMBBReference(*Exit) << ")\n"); + LLVM_DEBUG(dbgs() << printReg(CurrentBackedgeReg, TRI) << ", " + << printMBBReference(*Exit) << ")\n"); } else { - DEBUG(dbgs() << ")\n"); + LLVM_DEBUG(dbgs() << ")\n"); } } } void AMDGPUMachineCFGStructurizer::createEntryPHIs(LinearizedRegion *CurrentRegion) { - DEBUG(PHIInfo.dump(MRI)); + LLVM_DEBUG(PHIInfo.dump(MRI)); for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; ++DRI) { @@ -2220,19 +2232,19 @@ void AMDGPUMachineCFGStructurizer::replaceRegisterWith(unsigned Register, MachineOperand &O = *I; ++I; if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) { - DEBUG(dbgs() << "Trying to substitute physical register: " - << printReg(NewRegister, MRI->getTargetRegisterInfo()) - << "\n"); + LLVM_DEBUG(dbgs() << "Trying to substitute physical register: " + << printReg(NewRegister, MRI->getTargetRegisterInfo()) + << "\n"); llvm_unreachable("Cannot substitute physical registers"); // We don't handle physical registers, but if we need to // in the future This is how we do it: // O.substPhysReg(NewRegister, *TRI); } else { - DEBUG(dbgs() << "Replacing register: " - << printReg(Register, MRI->getTargetRegisterInfo()) - << " with " - << printReg(NewRegister, MRI->getTargetRegisterInfo()) - << "\n"); + LLVM_DEBUG(dbgs() << "Replacing register: " + << printReg(Register, MRI->getTargetRegisterInfo()) + << " with " + << printReg(NewRegister, MRI->getTargetRegisterInfo()) + << "\n"); O.setReg(NewRegister); } } @@ -2240,20 +2252,20 @@ void AMDGPUMachineCFGStructurizer::replaceRegisterWith(unsigned Register, getRegionMRT()->replaceLiveOutReg(Register, NewRegister); - DEBUG(PHIInfo.dump(MRI)); + LLVM_DEBUG(PHIInfo.dump(MRI)); } void AMDGPUMachineCFGStructurizer::resolvePHIInfos(MachineBasicBlock *FunctionEntry) { - DEBUG(dbgs() << "Resolve PHI Infos\n"); - DEBUG(PHIInfo.dump(MRI)); + LLVM_DEBUG(dbgs() << "Resolve PHI Infos\n"); + LLVM_DEBUG(PHIInfo.dump(MRI)); for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; ++DRI) { unsigned DestReg = *DRI; - DEBUG(dbgs() << "DestReg: " << printReg(DestReg, TRI) << "\n"); + LLVM_DEBUG(dbgs() << "DestReg: " << printReg(DestReg, TRI) << "\n"); auto SRI = PHIInfo.sources_begin(DestReg); unsigned SourceReg = (*SRI).first; - DEBUG(dbgs() << "DestReg: " << printReg(DestReg, TRI) - << " SourceReg: " << printReg(SourceReg, TRI) << "\n"); + LLVM_DEBUG(dbgs() << "DestReg: " << printReg(DestReg, TRI) + << " SourceReg: " << printReg(SourceReg, TRI) << "\n"); assert(PHIInfo.sources_end(DestReg) == ++SRI && "More than one phi source in entry node"); @@ -2327,9 +2339,9 @@ MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion( MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true); ArrayRef<MachineOperand> Cond(RegOp); - DEBUG(dbgs() << "RegionExitReg: "); - DEBUG(Cond[0].print(dbgs(), TRI)); - DEBUG(dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "RegionExitReg: "); + LLVM_DEBUG(Cond[0].print(dbgs(), TRI)); + LLVM_DEBUG(dbgs() << "\n"); TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit, Cond, DebugLoc()); RegionExit->addSuccessor(CurrentRegion->getEntry()); @@ -2339,12 +2351,12 @@ MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion( LinearizedRegion InnerRegion(CodeBB, MRI, TRI, PHIInfo); InnerRegion.setParent(CurrentRegion); - DEBUG(dbgs() << "Insert BB Select PHI (BB)\n"); + LLVM_DEBUG(dbgs() << "Insert BB Select PHI (BB)\n"); insertMergePHI(IfBB, CodeBB, MergeBB, BBSelectRegOut, BBSelectRegIn, CodeBBSelectReg); InnerRegion.addMBB(MergeBB); - DEBUG(InnerRegion.print(dbgs(), TRI)); + LLVM_DEBUG(InnerRegion.print(dbgs(), TRI)); rewriteLiveOutRegs(IfBB, CodeBB, MergeBB, &InnerRegion, CurrentRegion); extractKilledPHIs(CodeBB); if (IsRegionEntryBB) { @@ -2385,16 +2397,16 @@ MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion( CurrentRegion->getRegionMRT()->getEntry()->getNumber()); MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true); ArrayRef<MachineOperand> Cond(RegOp); - DEBUG(dbgs() << "RegionExitReg: "); - DEBUG(Cond[0].print(dbgs(), TRI)); - DEBUG(dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "RegionExitReg: "); + LLVM_DEBUG(Cond[0].print(dbgs(), TRI)); + LLVM_DEBUG(dbgs() << "\n"); TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit, Cond, DebugLoc()); RegionExit->addSuccessor(IfBB); } } CurrentRegion->addMBBs(InnerRegion); - DEBUG(dbgs() << "Insert BB Select PHI (region)\n"); + LLVM_DEBUG(dbgs() << "Insert BB Select PHI (region)\n"); insertMergePHI(IfBB, CodeExitBB, MergeBB, BBSelectRegOut, BBSelectRegIn, CodeBBSelectReg); @@ -2440,15 +2452,16 @@ void AMDGPUMachineCFGStructurizer::splitLoopPHI(MachineInstr &PHI, MachineInstrBuilder MIB = BuildMI(*EntrySucc, EntrySucc->instr_begin(), PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), NewDestReg); - DEBUG(dbgs() << "Split Entry PHI " << printReg(NewDestReg, TRI) << " = PHI("); + LLVM_DEBUG(dbgs() << "Split Entry PHI " << printReg(NewDestReg, TRI) + << " = PHI("); MIB.addReg(PHISource); MIB.addMBB(Entry); - DEBUG(dbgs() << printReg(PHISource, TRI) << ", " - << printMBBReference(*Entry)); + LLVM_DEBUG(dbgs() << printReg(PHISource, TRI) << ", " + << printMBBReference(*Entry)); MIB.addReg(RegionSourceReg); MIB.addMBB(RegionSourceMBB); - DEBUG(dbgs() << " ," << printReg(RegionSourceReg, TRI) << ", " - << printMBBReference(*RegionSourceMBB) << ")\n"); + LLVM_DEBUG(dbgs() << " ," << printReg(RegionSourceReg, TRI) << ", " + << printMBBReference(*RegionSourceMBB) << ")\n"); } void AMDGPUMachineCFGStructurizer::splitLoopPHIs(MachineBasicBlock *Entry, @@ -2481,7 +2494,8 @@ AMDGPUMachineCFGStructurizer::splitExit(LinearizedRegion *LRegion) { LRegion->addMBB(NewExit); LRegion->setExit(NewExit); - DEBUG(dbgs() << "Created new exit block: " << NewExit->getNumber() << "\n"); + LLVM_DEBUG(dbgs() << "Created new exit block: " << NewExit->getNumber() + << "\n"); // Replace any PHI Predecessors in the successor with NewExit for (auto &II : *Succ) { @@ -2529,9 +2543,9 @@ AMDGPUMachineCFGStructurizer::splitEntry(LinearizedRegion *LRegion) { MachineBasicBlock *EntrySucc = split(Entry->getFirstNonPHI()); MachineBasicBlock *Exit = LRegion->getExit(); - DEBUG(dbgs() << "Split " << printMBBReference(*Entry) << " to " - << printMBBReference(*Entry) << " -> " - << printMBBReference(*EntrySucc) << "\n"); + LLVM_DEBUG(dbgs() << "Split " << printMBBReference(*Entry) << " to " + << printMBBReference(*Entry) << " -> " + << printMBBReference(*EntrySucc) << "\n"); LRegion->addMBB(EntrySucc); // Make the backedge go to Entry Succ @@ -2622,21 +2636,21 @@ bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) { rewriteRegionExitPHIs(Region, LastMerge, LRegion); removeOldExitPreds(Region); - DEBUG(PHIInfo.dump(MRI)); + LLVM_DEBUG(PHIInfo.dump(MRI)); SetVector<MRT *> *Children = Region->getChildren(); - DEBUG(dbgs() << "===========If Region Start===============\n"); + LLVM_DEBUG(dbgs() << "===========If Region Start===============\n"); if (LRegion->getHasLoop()) { - DEBUG(dbgs() << "Has Backedge: Yes\n"); + LLVM_DEBUG(dbgs() << "Has Backedge: Yes\n"); } else { - DEBUG(dbgs() << "Has Backedge: No\n"); + LLVM_DEBUG(dbgs() << "Has Backedge: No\n"); } unsigned BBSelectRegIn; unsigned BBSelectRegOut; for (auto CI = Children->begin(), CE = Children->end(); CI != CE; ++CI) { - DEBUG(dbgs() << "CurrentRegion: \n"); - DEBUG(LRegion->print(dbgs(), TRI)); + LLVM_DEBUG(dbgs() << "CurrentRegion: \n"); + LLVM_DEBUG(LRegion->print(dbgs(), TRI)); auto CNI = CI; ++CNI; @@ -2650,9 +2664,9 @@ bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) { // We found the block is the exit of an inner region, we need // to put it in the current linearized region. - DEBUG(dbgs() << "Linearizing region: "); - DEBUG(InnerLRegion->print(dbgs(), TRI)); - DEBUG(dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Linearizing region: "); + LLVM_DEBUG(InnerLRegion->print(dbgs(), TRI)); + LLVM_DEBUG(dbgs() << "\n"); MachineBasicBlock *InnerEntry = InnerLRegion->getEntry(); if ((&(*(InnerEntry->getParent()->begin()))) == InnerEntry) { @@ -2670,10 +2684,10 @@ bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) { BBSelectRegOut = Child->getBBSelectRegOut(); BBSelectRegIn = Child->getBBSelectRegIn(); - DEBUG(dbgs() << "BBSelectRegIn: " << printReg(BBSelectRegIn, TRI) - << "\n"); - DEBUG(dbgs() << "BBSelectRegOut: " << printReg(BBSelectRegOut, TRI) - << "\n"); + LLVM_DEBUG(dbgs() << "BBSelectRegIn: " << printReg(BBSelectRegIn, TRI) + << "\n"); + LLVM_DEBUG(dbgs() << "BBSelectRegOut: " << printReg(BBSelectRegOut, TRI) + << "\n"); MachineBasicBlock *IfEnd = CurrentMerge; CurrentMerge = createIfRegion(CurrentMerge, InnerLRegion, LRegion, @@ -2682,7 +2696,7 @@ bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) { TII->convertNonUniformIfRegion(CurrentMerge, IfEnd); } else { MachineBasicBlock *MBB = Child->getMBBMRT()->getMBB(); - DEBUG(dbgs() << "Linearizing block: " << MBB->getNumber() << "\n"); + LLVM_DEBUG(dbgs() << "Linearizing block: " << MBB->getNumber() << "\n"); if (MBB == getSingleExitNode(*(MBB->getParent()))) { // If this is the exit block then we need to skip to the next. @@ -2694,10 +2708,10 @@ bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) { BBSelectRegOut = Child->getBBSelectRegOut(); BBSelectRegIn = Child->getBBSelectRegIn(); - DEBUG(dbgs() << "BBSelectRegIn: " << printReg(BBSelectRegIn, TRI) - << "\n"); - DEBUG(dbgs() << "BBSelectRegOut: " << printReg(BBSelectRegOut, TRI) - << "\n"); + LLVM_DEBUG(dbgs() << "BBSelectRegIn: " << printReg(BBSelectRegIn, TRI) + << "\n"); + LLVM_DEBUG(dbgs() << "BBSelectRegOut: " << printReg(BBSelectRegOut, TRI) + << "\n"); MachineBasicBlock *IfEnd = CurrentMerge; // This is a basic block that is not part of an inner region, we @@ -2708,7 +2722,7 @@ bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) { TII->convertNonUniformIfRegion(CurrentMerge, IfEnd); } - DEBUG(PHIInfo.dump(MRI)); + LLVM_DEBUG(PHIInfo.dump(MRI)); } } @@ -2729,7 +2743,7 @@ bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) { NewInReg, Region->getEntry()->getNumber()); // Need to be careful about updating the registers inside the region. LRegion->replaceRegisterInsideRegion(InReg, InnerSelectReg, false, MRI); - DEBUG(dbgs() << "Loop BBSelect Merge PHI:\n"); + LLVM_DEBUG(dbgs() << "Loop BBSelect Merge PHI:\n"); insertMergePHI(LRegion->getEntry(), LRegion->getExit(), NewSucc, InnerSelectReg, NewInReg, LRegion->getRegionMRT()->getInnerOutputRegister()); @@ -2741,11 +2755,11 @@ bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) { TII->insertReturn(*LastMerge); } - DEBUG(Region->getEntry()->getParent()->dump()); - DEBUG(LRegion->print(dbgs(), TRI)); - DEBUG(PHIInfo.dump(MRI)); + LLVM_DEBUG(Region->getEntry()->getParent()->dump()); + LLVM_DEBUG(LRegion->print(dbgs(), TRI)); + LLVM_DEBUG(PHIInfo.dump(MRI)); - DEBUG(dbgs() << "===========If Region End===============\n"); + LLVM_DEBUG(dbgs() << "===========If Region End===============\n"); Region->setLinearizedRegion(LRegion); return true; @@ -2785,12 +2799,12 @@ bool AMDGPUMachineCFGStructurizer::structurizeRegions(RegionMRT *Region, } void AMDGPUMachineCFGStructurizer::initFallthroughMap(MachineFunction &MF) { - DEBUG(dbgs() << "Fallthrough Map:\n"); + LLVM_DEBUG(dbgs() << "Fallthrough Map:\n"); for (auto &MBBI : MF) { MachineBasicBlock *MBB = MBBI.getFallThrough(); if (MBB != nullptr) { - DEBUG(dbgs() << "Fallthrough: " << MBBI.getNumber() << " -> " - << MBB->getNumber() << "\n"); + LLVM_DEBUG(dbgs() << "Fallthrough: " << MBBI.getNumber() << " -> " + << MBB->getNumber() << "\n"); } FallthroughMap[&MBBI] = MBB; } @@ -2801,8 +2815,8 @@ void AMDGPUMachineCFGStructurizer::createLinearizedRegion(RegionMRT *Region, LinearizedRegion *LRegion = new LinearizedRegion(); if (SelectOut) { LRegion->addLiveOut(SelectOut); - DEBUG(dbgs() << "Add LiveOut (BBSelect): " << printReg(SelectOut, TRI) - << "\n"); + LLVM_DEBUG(dbgs() << "Add LiveOut (BBSelect): " << printReg(SelectOut, TRI) + << "\n"); } LRegion->setRegionMRT(Region); Region->setLinearizedRegion(LRegion); @@ -2864,19 +2878,19 @@ bool AMDGPUMachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) { initFallthroughMap(MF); checkRegOnlyPHIInputs(MF); - DEBUG(dbgs() << "----STRUCTURIZER START----\n"); - DEBUG(MF.dump()); + LLVM_DEBUG(dbgs() << "----STRUCTURIZER START----\n"); + LLVM_DEBUG(MF.dump()); Regions = &(getAnalysis<MachineRegionInfoPass>().getRegionInfo()); - DEBUG(Regions->dump()); + LLVM_DEBUG(Regions->dump()); RegionMRT *RTree = MRT::buildMRT(MF, Regions, TII, MRI); setRegionMRT(RTree); initializeSelectRegisters(RTree, 0, MRI, TII); - DEBUG(RTree->dump(TRI)); + LLVM_DEBUG(RTree->dump(TRI)); bool result = structurizeRegions(RTree, true); delete RTree; - DEBUG(dbgs() << "----STRUCTURIZER END----\n"); + LLVM_DEBUG(dbgs() << "----STRUCTURIZER END----\n"); initFallthroughMap(MF); return result; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp index 265104a8643..4ff6c6e01c6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp @@ -114,7 +114,7 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) { M.getDataLayout()); F.setName(Name); } - DEBUG(dbgs() << "found enqueued kernel: " << F.getName() << '\n'); + LLVM_DEBUG(dbgs() << "found enqueued kernel: " << F.getName() << '\n'); auto RuntimeHandle = (F.getName() + ".runtime_handle").str(); auto T = Type::getInt8Ty(C)->getPointerTo(AMDGPUAS::GLOBAL_ADDRESS); auto *GV = new GlobalVariable( @@ -124,7 +124,7 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) { /*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal, AMDGPUAS::GLOBAL_ADDRESS, /*IsExternallyInitialized=*/false); - DEBUG(dbgs() << "runtime handle created: " << *GV << '\n'); + LLVM_DEBUG(dbgs() << "runtime handle created: " << *GV << '\n'); for (auto U : F.users()) { auto *UU = &*U; @@ -145,7 +145,7 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) { if (F->getCallingConv() != CallingConv::AMDGPU_KERNEL) continue; F->addFnAttr("calls-enqueue-kernel"); - DEBUG(dbgs() << "mark enqueue_kernel caller:" << F->getName() << '\n'); + LLVM_DEBUG(dbgs() << "mark enqueue_kernel caller:" << F->getName() << '\n'); } return Changed; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 7d28c3c8259..9264cb11fb8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -343,13 +343,13 @@ static bool canVectorizeInst(Instruction *Inst, User *User) { static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { if (DisablePromoteAllocaToVector) { - DEBUG(dbgs() << " Promotion alloca to vector is disabled\n"); + LLVM_DEBUG(dbgs() << " Promotion alloca to vector is disabled\n"); return false; } ArrayType *AllocaTy = dyn_cast<ArrayType>(Alloca->getAllocatedType()); - DEBUG(dbgs() << "Alloca candidate for vectorization\n"); + LLVM_DEBUG(dbgs() << "Alloca candidate for vectorization\n"); // FIXME: There is no reason why we can't support larger arrays, we // are just being conservative for now. @@ -359,7 +359,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { AllocaTy->getNumElements() > 16 || AllocaTy->getNumElements() < 2 || !VectorType::isValidElementType(AllocaTy->getElementType())) { - DEBUG(dbgs() << " Cannot convert type to vector\n"); + LLVM_DEBUG(dbgs() << " Cannot convert type to vector\n"); return false; } @@ -380,7 +380,8 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { // If we can't compute a vector index from this GEP, then we can't // promote this alloca to vector. if (!Index) { - DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP << '\n'); + LLVM_DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP + << '\n'); return false; } @@ -395,8 +396,8 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { VectorType *VectorTy = arrayTypeToVecType(AllocaTy); - DEBUG(dbgs() << " Converting alloca to vector " - << *AllocaTy << " -> " << *VectorTy << '\n'); + LLVM_DEBUG(dbgs() << " Converting alloca to vector " << *AllocaTy << " -> " + << *VectorTy << '\n'); for (Value *V : WorkList) { Instruction *Inst = cast<Instruction>(V); @@ -485,7 +486,8 @@ bool AMDGPUPromoteAlloca::binaryOpIsDerivedFromSameAlloca(Value *BaseAlloca, // important part is both must have the same address space at // the end. if (OtherObj != BaseAlloca) { - DEBUG(dbgs() << "Found a binary instruction with another alloca object\n"); + LLVM_DEBUG( + dbgs() << "Found a binary instruction with another alloca object\n"); return false; } @@ -607,8 +609,8 @@ bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) { PointerType *PtrTy = dyn_cast<PointerType>(ParamTy); if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) { LocalMemLimit = 0; - DEBUG(dbgs() << "Function has local memory argument. Promoting to " - "local memory disabled.\n"); + LLVM_DEBUG(dbgs() << "Function has local memory argument. Promoting to " + "local memory disabled.\n"); return false; } } @@ -677,13 +679,12 @@ bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) { LocalMemLimit = MaxSizeWithWaveCount; - DEBUG( - dbgs() << F.getName() << " uses " << CurrentLocalMemUsage << " bytes of LDS\n" - << " Rounding size to " << MaxSizeWithWaveCount - << " with a maximum occupancy of " << MaxOccupancy << '\n' - << " and " << (LocalMemLimit - CurrentLocalMemUsage) - << " available for promotion\n" - ); + LLVM_DEBUG(dbgs() << F.getName() << " uses " << CurrentLocalMemUsage + << " bytes of LDS\n" + << " Rounding size to " << MaxSizeWithWaveCount + << " with a maximum occupancy of " << MaxOccupancy << '\n' + << " and " << (LocalMemLimit - CurrentLocalMemUsage) + << " available for promotion\n"); return true; } @@ -700,7 +701,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) { // First try to replace the alloca with a vector Type *AllocaTy = I.getAllocatedType(); - DEBUG(dbgs() << "Trying to promote " << I << '\n'); + LLVM_DEBUG(dbgs() << "Trying to promote " << I << '\n'); if (tryPromoteAllocaToVector(&I, AS)) return true; // Promoted to vector. @@ -716,7 +717,9 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) { case CallingConv::SPIR_KERNEL: break; default: - DEBUG(dbgs() << " promote alloca to LDS not supported with calling convention.\n"); + LLVM_DEBUG( + dbgs() + << " promote alloca to LDS not supported with calling convention.\n"); return false; } @@ -745,8 +748,8 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) { NewSize += AllocSize; if (NewSize > LocalMemLimit) { - DEBUG(dbgs() << " " << AllocSize - << " bytes of local memory not available to promote\n"); + LLVM_DEBUG(dbgs() << " " << AllocSize + << " bytes of local memory not available to promote\n"); return false; } @@ -755,11 +758,11 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) { std::vector<Value*> WorkList; if (!collectUsesWithPtrTypes(&I, &I, WorkList)) { - DEBUG(dbgs() << " Do not know how to convert all uses\n"); + LLVM_DEBUG(dbgs() << " Do not know how to convert all uses\n"); return false; } - DEBUG(dbgs() << "Promoting alloca to local memory\n"); + LLVM_DEBUG(dbgs() << "Promoting alloca to local memory\n"); Function *F = I.getParent()->getParent(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp index 83e56a9ab49..a861762a8c9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp @@ -249,8 +249,8 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) { SmallVector<Argument *, 4> OutArgs; for (Argument &Arg : F.args()) { if (isOutArgumentCandidate(Arg)) { - DEBUG(dbgs() << "Found possible out argument " << Arg - << " in function " << F.getName() << '\n'); + LLVM_DEBUG(dbgs() << "Found possible out argument " << Arg + << " in function " << F.getName() << '\n'); OutArgs.push_back(&Arg); } } @@ -310,7 +310,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) { SI = dyn_cast<StoreInst>(Q.getInst()); if (SI) { - DEBUG(dbgs() << "Found out argument store: " << *SI << '\n'); + LLVM_DEBUG(dbgs() << "Found out argument store: " << *SI << '\n'); ReplaceableStores.emplace_back(RI, SI); } else { ThisReplaceable = false; @@ -328,7 +328,8 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) { if (llvm::find_if(ValVec, [OutArg](const std::pair<Argument *, Value *> &Entry) { return Entry.first == OutArg;}) != ValVec.end()) { - DEBUG(dbgs() << "Saw multiple out arg stores" << *OutArg << '\n'); + LLVM_DEBUG(dbgs() + << "Saw multiple out arg stores" << *OutArg << '\n'); // It is possible to see stores to the same argument multiple times, // but we expect these would have been optimized out already. ThisReplaceable = false; @@ -358,7 +359,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) { F.getFunctionType()->params(), F.isVarArg()); - DEBUG(dbgs() << "Computed new return type: " << *NewRetTy << '\n'); + LLVM_DEBUG(dbgs() << "Computed new return type: " << *NewRetTy << '\n'); Function *NewFunc = Function::Create(NewFuncTy, Function::PrivateLinkage, F.getName() + ".body"); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 3ea7a82ea7a..ce17f027b52 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -124,8 +124,9 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, continue; if (dependsOnLocalPhi(L, Br->getCondition())) { UP.Threshold += UnrollThresholdIf; - DEBUG(dbgs() << "Set unroll threshold " << UP.Threshold - << " for loop:\n" << *L << " due to " << *Br << '\n'); + LLVM_DEBUG(dbgs() << "Set unroll threshold " << UP.Threshold + << " for loop:\n" + << *L << " due to " << *Br << '\n'); if (UP.Threshold >= MaxBoost) return; } @@ -201,8 +202,9 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, // Don't use the maximum allowed value here as it will make some // programs way too big. UP.Threshold = Threshold; - DEBUG(dbgs() << "Set unroll threshold " << Threshold << " for loop:\n" - << *L << " due to " << *GEP << '\n'); + LLVM_DEBUG(dbgs() << "Set unroll threshold " << Threshold + << " for loop:\n" + << *L << " due to " << *GEP << '\n'); if (UP.Threshold >= MaxBoost) return; } diff --git a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp index 1793de63b59..cd1801a8244 100644 --- a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp @@ -78,23 +78,18 @@ namespace { // //===----------------------------------------------------------------------===// -#define SHOWNEWINSTR(i) \ - DEBUG(dbgs() << "New instr: " << *i << "\n"); - -#define SHOWNEWBLK(b, msg) \ -DEBUG( \ - dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ - dbgs() << "\n"; \ -); - -#define SHOWBLK_DETAIL(b, msg) \ -DEBUG( \ - if (b) { \ - dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ - b->print(dbgs()); \ - dbgs() << "\n"; \ - } \ -); +#define SHOWNEWINSTR(i) LLVM_DEBUG(dbgs() << "New instr: " << *i << "\n"); + +#define SHOWNEWBLK(b, msg) \ + LLVM_DEBUG(dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ + dbgs() << "\n";); + +#define SHOWBLK_DETAIL(b, msg) \ + LLVM_DEBUG(if (b) { \ + dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ + b->print(dbgs()); \ + dbgs() << "\n"; \ + }); #define INVALIDSCCNUM -1 @@ -158,19 +153,19 @@ public: bool runOnMachineFunction(MachineFunction &MF) override { TII = MF.getSubtarget<R600Subtarget>().getInstrInfo(); TRI = &TII->getRegisterInfo(); - DEBUG(MF.dump();); + LLVM_DEBUG(MF.dump();); OrderedBlks.clear(); Visited.clear(); FuncRep = &MF; MLI = &getAnalysis<MachineLoopInfo>(); - DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI);); + LLVM_DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI);); MDT = &getAnalysis<MachineDominatorTree>(); - DEBUG(MDT->print(dbgs(), (const Module*)nullptr);); + LLVM_DEBUG(MDT->print(dbgs(), (const Module *)nullptr);); PDT = &getAnalysis<MachinePostDominatorTree>(); - DEBUG(PDT->print(dbgs());); + LLVM_DEBUG(PDT->print(dbgs());); prepare(); run(); - DEBUG(MF.dump();); + LLVM_DEBUG(MF.dump();); return true; } @@ -650,9 +645,8 @@ bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) { if (MI) assert(IsReturn); else if (IsReturn) - DEBUG( - dbgs() << "BB" << MBB->getNumber() - <<" is return block without RETURN instr\n";); + LLVM_DEBUG(dbgs() << "BB" << MBB->getNumber() + << " is return block without RETURN instr\n";); return IsReturn; } @@ -714,7 +708,7 @@ bool AMDGPUCFGStructurizer::prepare() { //FIXME: if not reducible flow graph, make it so ??? - DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";); + LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";); orderBlocks(FuncRep); @@ -757,14 +751,14 @@ bool AMDGPUCFGStructurizer::prepare() { bool AMDGPUCFGStructurizer::run() { //Assume reducible CFG... - DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n"); + LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n"); #ifdef STRESSTEST //Use the worse block ordering to test the algorithm. ReverseVector(orderedBlks); #endif - DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks();); + LLVM_DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks();); int NumIter = 0; bool Finish = false; MachineBasicBlock *MBB; @@ -774,10 +768,8 @@ bool AMDGPUCFGStructurizer::run() { do { ++NumIter; - DEBUG( - dbgs() << "numIter = " << NumIter - << ", numRemaintedBlk = " << NumRemainedBlk << "\n"; - ); + LLVM_DEBUG(dbgs() << "numIter = " << NumIter + << ", numRemaintedBlk = " << NumRemainedBlk << "\n";); SmallVectorImpl<MachineBasicBlock *>::const_iterator It = OrderedBlks.begin(); @@ -799,10 +791,8 @@ bool AMDGPUCFGStructurizer::run() { SccBeginMBB = MBB; SccNumIter = 0; SccNumBlk = NumRemainedBlk; // Init to maximum possible number. - DEBUG( - dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB); - dbgs() << "\n"; - ); + LLVM_DEBUG(dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB); + dbgs() << "\n";); } if (!isRetiredBlock(MBB)) @@ -817,20 +807,16 @@ bool AMDGPUCFGStructurizer::run() { ++SccNumIter; int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It); if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) { - DEBUG( - dbgs() << "Can't reduce SCC " << getSCCNum(MBB) - << ", sccNumIter = " << SccNumIter; - dbgs() << "doesn't make any progress\n"; - ); + LLVM_DEBUG(dbgs() << "Can't reduce SCC " << getSCCNum(MBB) + << ", sccNumIter = " << SccNumIter; + dbgs() << "doesn't make any progress\n";); ContNextScc = true; } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) { SccNumBlk = sccRemainedNumBlk; It = SccBeginIter; ContNextScc = false; - DEBUG( - dbgs() << "repeat processing SCC" << getSCCNum(MBB) - << "sccNumIter = " << SccNumIter << '\n'; - ); + LLVM_DEBUG(dbgs() << "repeat processing SCC" << getSCCNum(MBB) + << "sccNumIter = " << SccNumIter << '\n';); } else { // Finish the current scc. ContNextScc = true; @@ -848,9 +834,7 @@ bool AMDGPUCFGStructurizer::run() { *GraphTraits<MachineFunction *>::nodes_begin(FuncRep); if (EntryMBB->succ_size() == 0) { Finish = true; - DEBUG( - dbgs() << "Reduce to one block\n"; - ); + LLVM_DEBUG(dbgs() << "Reduce to one block\n";); } else { int NewnumRemainedBlk = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end()); @@ -860,9 +844,7 @@ bool AMDGPUCFGStructurizer::run() { NumRemainedBlk = NewnumRemainedBlk; } else { MakeProgress = false; - DEBUG( - dbgs() << "No progress\n"; - ); + LLVM_DEBUG(dbgs() << "No progress\n";); } } } while (!Finish && MakeProgress); @@ -875,9 +857,7 @@ bool AMDGPUCFGStructurizer::run() { It != E; ++It) { if ((*It).second && (*It).second->IsRetired) { assert(((*It).first)->getNumber() != -1); - DEBUG( - dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n"; - ); + LLVM_DEBUG(dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";); (*It).first->eraseFromParent(); //Remove from the parent Function. } delete (*It).second; @@ -886,7 +866,7 @@ bool AMDGPUCFGStructurizer::run() { LLInfoMap.clear(); if (!Finish) { - DEBUG(FuncRep->viewCFG()); + LLVM_DEBUG(FuncRep->viewCFG()); report_fatal_error("IRREDUCIBLE_CFG"); } @@ -920,17 +900,13 @@ int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) { int NumMatch = 0; int CurMatch; - DEBUG( - dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n"; - ); + LLVM_DEBUG(dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";); while ((CurMatch = patternMatchGroup(MBB)) > 0) NumMatch += CurMatch; - DEBUG( - dbgs() << "End patternMatch BB" << MBB->getNumber() - << ", numMatch = " << NumMatch << "\n"; - ); + LLVM_DEBUG(dbgs() << "End patternMatch BB" << MBB->getNumber() + << ", numMatch = " << NumMatch << "\n";); return NumMatch; } @@ -1050,7 +1026,7 @@ int AMDGPUCFGStructurizer::loopendPatternMatch() { for (MachineLoop *ExaminedLoop : NestedLoops) { if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop]) continue; - DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump();); + LLVM_DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump();); int NumBreak = mergeLoop(ExaminedLoop); if (NumBreak == -1) break; @@ -1064,7 +1040,8 @@ int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) { MBBVector ExitingMBBs; LoopRep->getExitingBlocks(ExitingMBBs); assert(!ExitingMBBs.empty() && "Infinite Loop not supported"); - DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() << " exiting blocks\n";); + LLVM_DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() + << " exiting blocks\n";); // We assume a single ExitBlk MBBVector ExitBlks; LoopRep->getExitBlocks(ExitBlks); @@ -1106,11 +1083,9 @@ bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak( if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) { MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep]; if (TheEntry) { - DEBUG( - dbgs() << "isLoopContBreakBlock yes src1 = BB" - << Src1MBB->getNumber() - << " src2 = BB" << Src2MBB->getNumber() << "\n"; - ); + LLVM_DEBUG(dbgs() << "isLoopContBreakBlock yes src1 = BB" + << Src1MBB->getNumber() << " src2 = BB" + << Src2MBB->getNumber() << "\n";); return true; } } @@ -1122,9 +1097,8 @@ int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB); if (Num == 0) { - DEBUG( - dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n"; - ); + LLVM_DEBUG(dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" + << "\n";); Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB); } return Num; @@ -1138,22 +1112,16 @@ int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB, //trueBlk could be the common post dominator DownBlk = TrueMBB; - DEBUG( - dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber() - << " true = BB" << TrueMBB->getNumber() - << ", numSucc=" << TrueMBB->succ_size() - << " false = BB" << FalseMBB->getNumber() << "\n"; - ); + LLVM_DEBUG(dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber() + << " true = BB" << TrueMBB->getNumber() + << ", numSucc=" << TrueMBB->succ_size() << " false = BB" + << FalseMBB->getNumber() << "\n";); while (DownBlk) { - DEBUG( - dbgs() << "check down = BB" << DownBlk->getNumber(); - ); + LLVM_DEBUG(dbgs() << "check down = BB" << DownBlk->getNumber();); if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) { - DEBUG( - dbgs() << " working\n"; - ); + LLVM_DEBUG(dbgs() << " working\n";); Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk); Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk); @@ -1166,9 +1134,7 @@ int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB, break; } - DEBUG( - dbgs() << " not working\n"; - ); + LLVM_DEBUG(dbgs() << " not working\n";); DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr; } // walk down the postDomTree @@ -1247,10 +1213,9 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1) MigrateFalse = true; - DEBUG( - dbgs() << "before improveSimpleJumpintoIf: "; - showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0); - ); + LLVM_DEBUG( + dbgs() << "before improveSimpleJumpintoIf: "; + showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);); // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk // @@ -1385,10 +1350,9 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, report_fatal_error("Extra register needed to handle CFG"); } } - DEBUG( - dbgs() << "result from improveSimpleJumpintoIf: "; - showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0); - ); + LLVM_DEBUG( + dbgs() << "result from improveSimpleJumpintoIf: "; + showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);); // update landBlk *LandMBBPtr = LandBlk; @@ -1398,10 +1362,8 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB, MachineBasicBlock *SrcMBB) { - DEBUG( - dbgs() << "serialPattern BB" << DstMBB->getNumber() - << " <= BB" << SrcMBB->getNumber() << "\n"; - ); + LLVM_DEBUG(dbgs() << "serialPattern BB" << DstMBB->getNumber() << " <= BB" + << SrcMBB->getNumber() << "\n";); DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end()); DstMBB->removeSuccessor(SrcMBB, true); @@ -1416,26 +1378,15 @@ void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI, MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) { assert (TrueMBB); - DEBUG( - dbgs() << "ifPattern BB" << MBB->getNumber(); - dbgs() << "{ "; - if (TrueMBB) { - dbgs() << "BB" << TrueMBB->getNumber(); - } - dbgs() << " } else "; - dbgs() << "{ "; - if (FalseMBB) { - dbgs() << "BB" << FalseMBB->getNumber(); - } - dbgs() << " }\n "; - dbgs() << "landBlock: "; - if (!LandMBB) { - dbgs() << "NULL"; - } else { - dbgs() << "BB" << LandMBB->getNumber(); - } - dbgs() << "\n"; - ); + LLVM_DEBUG(dbgs() << "ifPattern BB" << MBB->getNumber(); dbgs() << "{ "; + if (TrueMBB) { dbgs() << "BB" << TrueMBB->getNumber(); } dbgs() + << " } else "; + dbgs() << "{ "; if (FalseMBB) { + dbgs() << "BB" << FalseMBB->getNumber(); + } dbgs() << " }\n "; + dbgs() << "landBlock: "; if (!LandMBB) { dbgs() << "NULL"; } else { + dbgs() << "BB" << LandMBB->getNumber(); + } dbgs() << "\n";); int OldOpcode = BranchMI->getOpcode(); DebugLoc BranchDL = BranchMI->getDebugLoc(); @@ -1481,8 +1432,8 @@ void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI, void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk, MachineBasicBlock *LandMBB) { - DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber() - << " land = BB" << LandMBB->getNumber() << "\n";); + LLVM_DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber() + << " land = BB" << LandMBB->getNumber() << "\n";); insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc()); insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc()); @@ -1491,8 +1442,9 @@ void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk, void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, MachineBasicBlock *LandMBB) { - DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber() - << " land = BB" << LandMBB->getNumber() << "\n";); + LLVM_DEBUG(dbgs() << "loopbreakPattern exiting = BB" + << ExitingMBB->getNumber() << " land = BB" + << LandMBB->getNumber() << "\n";); MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB); assert(BranchMI && isCondBranch(BranchMI)); DebugLoc DL = BranchMI->getDebugLoc(); @@ -1511,9 +1463,9 @@ void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB, MachineBasicBlock *ContMBB) { - DEBUG(dbgs() << "settleLoopcontBlock conting = BB" - << ContingMBB->getNumber() - << ", cont = BB" << ContMBB->getNumber() << "\n";); + LLVM_DEBUG(dbgs() << "settleLoopcontBlock conting = BB" + << ContingMBB->getNumber() << ", cont = BB" + << ContMBB->getNumber() << "\n";); MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB); if (MI) { @@ -1587,10 +1539,9 @@ AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB, numClonedInstr += MBB->size(); - DEBUG( - dbgs() << "Cloned block: " << "BB" - << MBB->getNumber() << "size " << MBB->size() << "\n"; - ); + LLVM_DEBUG(dbgs() << "Cloned block: " + << "BB" << MBB->getNumber() << "size " << MBB->size() + << "\n";); SHOWNEWBLK(CloneMBB, "result of Cloned block: "); @@ -1603,26 +1554,22 @@ void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB, //look for the input branchinstr, not the AMDGPU branchinstr MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB); if (!BranchMI) { - DEBUG( - dbgs() << "migrateInstruction don't see branch instr\n"; - ); + LLVM_DEBUG(dbgs() << "migrateInstruction don't see branch instr\n";); SpliceEnd = SrcMBB->end(); } else { - DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI); + LLVM_DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI); SpliceEnd = BranchMI; } - DEBUG( - dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size() - << "srcSize = " << SrcMBB->size() << "\n"; - ); + LLVM_DEBUG(dbgs() << "migrateInstruction before splice dstSize = " + << DstMBB->size() << "srcSize = " << SrcMBB->size() + << "\n";); //splice insert before insertPos DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd); - DEBUG( - dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size() - << "srcSize = " << SrcMBB->size() << '\n'; - ); + LLVM_DEBUG(dbgs() << "migrateInstruction after splice dstSize = " + << DstMBB->size() << "srcSize = " << SrcMBB->size() + << '\n';); } MachineBasicBlock * @@ -1640,7 +1587,7 @@ AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) { MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); FuncRep->push_back(DummyExitBlk); //insert to function SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); - DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";); + LLVM_DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";); LLVMContext &Ctx = LoopHeader->getParent()->getFunction().getContext(); Ctx.emitError("Extra register needed to handle CFG"); return nullptr; @@ -1653,7 +1600,7 @@ void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) { // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. while ((BranchMI = getLoopendBlockBranchInstr(MBB)) && isUncondBranch(BranchMI)) { - DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI); + LLVM_DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI); BranchMI->eraseFromParent(); } } @@ -1669,7 +1616,7 @@ void AMDGPUCFGStructurizer::removeRedundantConditionalBranch( MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); assert(BranchMI && isCondBranch(BranchMI)); - DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI); + LLVM_DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI); BranchMI->eraseFromParent(); SHOWNEWBLK(MBB1, "Removing redundant successor"); MBB->removeSuccessor(MBB1, true); @@ -1688,10 +1635,8 @@ void AMDGPUCFGStructurizer::addDummyExitBlock( if (MI) MI->eraseFromParent(); MBB->addSuccessor(DummyExitBlk); - DEBUG( - dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber() - << " successors\n"; - ); + LLVM_DEBUG(dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber() + << " successors\n";); } SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: "); } @@ -1710,9 +1655,7 @@ void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB, } void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) { - DEBUG( - dbgs() << "Retiring BB" << MBB->getNumber() << "\n"; - ); + LLVM_DEBUG(dbgs() << "Retiring BB" << MBB->getNumber() << "\n";); BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB]; diff --git a/llvm/lib/Target/AMDGPU/GCNILPSched.cpp b/llvm/lib/Target/AMDGPU/GCNILPSched.cpp index ba8211b189c..651091d4413 100644 --- a/llvm/lib/Target/AMDGPU/GCNILPSched.cpp +++ b/llvm/lib/Target/AMDGPU/GCNILPSched.cpp @@ -149,9 +149,9 @@ static int BUCompareLatency(const SUnit *left, const SUnit *right) { int LDepth = left->getDepth(); int RDepth = right->getDepth(); if (LDepth != RDepth) { - DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum - << ") depth " << LDepth << " vs SU (" << right->NodeNum - << ") depth " << RDepth << "\n"); + LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum + << ") depth " << LDepth << " vs SU (" << right->NodeNum + << ") depth " << RDepth << "\n"); return LDepth < RDepth ? 1 : -1; } if (left->Latency != right->Latency) @@ -169,9 +169,9 @@ const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right) if (!DisableSchedCriticalPath) { int spread = (int)left->getDepth() - (int)right->getDepth(); if (std::abs(spread) > MaxReorderWindow) { - DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " - << left->getDepth() << " != SU(" << right->NodeNum << "): " - << right->getDepth() << "\n"); + LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " + << left->getDepth() << " != SU(" << right->NodeNum + << "): " << right->getDepth() << "\n"); return left->getDepth() < right->getDepth() ? right : left; } } @@ -324,19 +324,18 @@ GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots, if (AvailQueue.empty()) break; - DEBUG( - dbgs() << "\n=== Picking candidate\n" - "Ready queue:"; - for (auto &C : AvailQueue) - dbgs() << ' ' << C.SU->NodeNum; - dbgs() << '\n'; - ); + LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n" + "Ready queue:"; + for (auto &C + : AvailQueue) dbgs() + << ' ' << C.SU->NodeNum; + dbgs() << '\n';); auto C = pickCandidate(); assert(C); AvailQueue.remove(*C); auto SU = C->SU; - DEBUG(dbgs() << "Selected "; SU->dump(&DAG)); + LLVM_DEBUG(dbgs() << "Selected "; SU->dump(&DAG)); advanceToCycle(SU->getHeight()); diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp index 182ce1e4f63..7f0d80f3d77 100644 --- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp +++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp @@ -200,8 +200,8 @@ public: void schedule() { assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End); - DEBUG(dbgs() << "\nScheduling "; - printRegion(dbgs(), Rgn.Begin, Rgn.End, Sch.LIS, 2)); + LLVM_DEBUG(dbgs() << "\nScheduling "; + printRegion(dbgs(), Rgn.Begin, Rgn.End, Sch.LIS, 2)); Sch.BaseClass::schedule(); // Unfortunatelly placeDebugValues incorrectly modifies RegionEnd, restore @@ -311,14 +311,13 @@ void GCNIterativeScheduler::enterRegion(MachineBasicBlock *BB, // overriden void GCNIterativeScheduler::schedule() { // overriden // do nothing - DEBUG( - printLivenessInfo(dbgs(), RegionBegin, RegionEnd, LIS); - if (!Regions.empty() && Regions.back()->Begin == RegionBegin) { - dbgs() << "Max RP: "; - Regions.back()->MaxPressure.print(dbgs(), &MF.getSubtarget<SISubtarget>()); - } - dbgs() << '\n'; - ); + LLVM_DEBUG(printLivenessInfo(dbgs(), RegionBegin, RegionEnd, LIS); + if (!Regions.empty() && Regions.back()->Begin == RegionBegin) { + dbgs() << "Max RP: "; + Regions.back()->MaxPressure.print( + dbgs(), &MF.getSubtarget<SISubtarget>()); + } dbgs() + << '\n';); } void GCNIterativeScheduler::finalizeSchedule() { // overriden @@ -453,22 +452,22 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) { // TODO: assert Regions are sorted descending by pressure const auto &ST = MF.getSubtarget<SISubtarget>(); const auto Occ = Regions.front()->MaxPressure.getOccupancy(ST); - DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc - << ", current = " << Occ << '\n'); + LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc + << ", current = " << Occ << '\n'); auto NewOcc = TargetOcc; for (auto R : Regions) { if (R->MaxPressure.getOccupancy(ST) >= NewOcc) break; - DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3); - printLivenessInfo(dbgs(), R->Begin, R->End, LIS)); + LLVM_DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3); + printLivenessInfo(dbgs(), R->Begin, R->End, LIS)); BuildDAG DAG(*R, *this); const auto MinSchedule = makeMinRegSchedule(DAG.getTopRoots(), *this); const auto MaxRP = getSchedulePressure(*R, MinSchedule); - DEBUG(dbgs() << "Occupancy improvement attempt:\n"; - printSchedRP(dbgs(), R->MaxPressure, MaxRP)); + LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n"; + printSchedRP(dbgs(), R->MaxPressure, MaxRP)); NewOcc = std::min(NewOcc, MaxRP.getOccupancy(ST)); if (NewOcc <= Occ) @@ -476,8 +475,8 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) { setBestSchedule(*R, MinSchedule, MaxRP); } - DEBUG(dbgs() << "New occupancy = " << NewOcc - << ", prev occupancy = " << Occ << '\n'); + LLVM_DEBUG(dbgs() << "New occupancy = " << NewOcc + << ", prev occupancy = " << Occ << '\n'); return std::max(NewOcc, Occ); } @@ -497,8 +496,9 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy( const int NumPasses = Occ < TgtOcc ? 2 : 1; TgtOcc = std::min(Occ, TgtOcc); - DEBUG(dbgs() << "Scheduling using default scheduler, " - "target occupancy = " << TgtOcc << '\n'); + LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, " + "target occupancy = " + << TgtOcc << '\n'); GCNMaxOccupancySchedStrategy LStrgy(Context); for (int I = 0; I < NumPasses; ++I) { @@ -510,16 +510,16 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy( Ovr.schedule(); const auto RP = getRegionPressure(*R); - DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP)); + LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP)); if (RP.getOccupancy(ST) < TgtOcc) { - DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc); + LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc); if (R->BestSchedule.get() && R->BestSchedule->MaxPressure.getOccupancy(ST) >= TgtOcc) { - DEBUG(dbgs() << ", scheduling minimal register\n"); + LLVM_DEBUG(dbgs() << ", scheduling minimal register\n"); scheduleBest(*R); } else { - DEBUG(dbgs() << ", restoring\n"); + LLVM_DEBUG(dbgs() << ", restoring\n"); Ovr.restoreOrder(); assert(R->MaxPressure.getOccupancy(ST) >= TgtOcc); } @@ -545,7 +545,7 @@ void GCNIterativeScheduler::scheduleMinReg(bool force) { const auto MinSchedule = makeMinRegSchedule(DAG.getTopRoots(), *this); const auto RP = getSchedulePressure(*R, MinSchedule); - DEBUG(if (R->MaxPressure.less(ST, RP, TgtOcc)) { + LLVM_DEBUG(if (R->MaxPressure.less(ST, RP, TgtOcc)) { dbgs() << "\nWarning: Pressure becomes worse after minreg!"; printSchedRP(dbgs(), R->MaxPressure, RP); }); @@ -554,7 +554,7 @@ void GCNIterativeScheduler::scheduleMinReg(bool force) { break; scheduleRegion(*R, MinSchedule, RP); - DEBUG(printSchedResult(dbgs(), R, RP)); + LLVM_DEBUG(printSchedResult(dbgs(), R, RP)); MaxPressure = RP; } @@ -577,26 +577,27 @@ void GCNIterativeScheduler::scheduleILP( Occ = tryMaximizeOccupancy(TgtOcc); TgtOcc = std::min(Occ, TgtOcc); - DEBUG(dbgs() << "Scheduling using default scheduler, " - "target occupancy = " << TgtOcc << '\n'); + LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, " + "target occupancy = " + << TgtOcc << '\n'); for (auto R : Regions) { BuildDAG DAG(*R, *this); const auto ILPSchedule = makeGCNILPScheduler(DAG.getBottomRoots(), *this); const auto RP = getSchedulePressure(*R, ILPSchedule); - DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP)); + LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP)); if (RP.getOccupancy(ST) < TgtOcc) { - DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc); + LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc); if (R->BestSchedule.get() && R->BestSchedule->MaxPressure.getOccupancy(ST) >= TgtOcc) { - DEBUG(dbgs() << ", scheduling minimal register\n"); + LLVM_DEBUG(dbgs() << ", scheduling minimal register\n"); scheduleBest(*R); } } else { scheduleRegion(*R, ILPSchedule, RP); - DEBUG(printSchedResult(dbgs(), R, RP)); + LLVM_DEBUG(printSchedResult(dbgs(), R, RP)); } } } diff --git a/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp index 9904b5f0f4b..192d534bb9c 100644 --- a/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp @@ -142,35 +142,38 @@ GCNMinRegScheduler::Candidate* GCNMinRegScheduler::pickCandidate() { unsigned Num = RQ.size(); if (Num == 1) break; - DEBUG(dbgs() << "\nSelecting max priority candidates among " << Num << '\n'); + LLVM_DEBUG(dbgs() << "\nSelecting max priority candidates among " << Num + << '\n'); Num = findMax(Num, [=](const Candidate &C) { return C.Priority; }); if (Num == 1) break; - DEBUG(dbgs() << "\nSelecting min non-ready producing candidate among " - << Num << '\n'); + LLVM_DEBUG(dbgs() << "\nSelecting min non-ready producing candidate among " + << Num << '\n'); Num = findMax(Num, [=](const Candidate &C) { auto SU = C.SU; int Res = getNotReadySuccessors(SU); - DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would left non-ready " - << Res << " successors, metric = " << -Res << '\n'); + LLVM_DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would left non-ready " + << Res << " successors, metric = " << -Res << '\n'); return -Res; }); if (Num == 1) break; - DEBUG(dbgs() << "\nSelecting most producing candidate among " - << Num << '\n'); + LLVM_DEBUG(dbgs() << "\nSelecting most producing candidate among " << Num + << '\n'); Num = findMax(Num, [=](const Candidate &C) { auto SU = C.SU; auto Res = getReadySuccessors(SU); - DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would make ready " - << Res << " successors, metric = " << Res << '\n'); + LLVM_DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would make ready " << Res + << " successors, metric = " << Res << '\n'); return Res; }); if (Num == 1) break; Num = Num ? Num : RQ.size(); - DEBUG(dbgs() << "\nCan't find best candidate, selecting in program order among " - << Num << '\n'); + LLVM_DEBUG( + dbgs() + << "\nCan't find best candidate, selecting in program order among " + << Num << '\n'); Num = findMax(Num, [=](const Candidate &C) { return -(int64_t)C.SU->NodeNum; }); assert(Num == 1); } while (false); @@ -202,17 +205,17 @@ void GCNMinRegScheduler::bumpPredsPriority(const SUnit *SchedSU, int Priority) { Worklist.push_back(P.getSUnit()); } } - DEBUG(dbgs() << "Make the predecessors of SU(" << SchedSU->NodeNum - << ")'s non-ready successors of " << Priority - << " priority in ready queue: "); + LLVM_DEBUG(dbgs() << "Make the predecessors of SU(" << SchedSU->NodeNum + << ")'s non-ready successors of " << Priority + << " priority in ready queue: "); const auto SetEnd = Set.end(); for (auto &C : RQ) { if (Set.find(C.SU) != SetEnd) { C.Priority = Priority; - DEBUG(dbgs() << " SU(" << C.SU->NodeNum << ')'); + LLVM_DEBUG(dbgs() << " SU(" << C.SU->NodeNum << ')'); } } - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << '\n'); } void GCNMinRegScheduler::releaseSuccessors(const SUnit* SU, int Priority) { @@ -243,19 +246,19 @@ GCNMinRegScheduler::schedule(ArrayRef<const SUnit*> TopRoots, releaseSuccessors(&DAG.EntrySU, StepNo); while (!RQ.empty()) { - DEBUG( - dbgs() << "\n=== Picking candidate, Step = " << StepNo << "\n" - "Ready queue:"; - for (auto &C : RQ) - dbgs() << ' ' << C.SU->NodeNum << "(P" << C.Priority << ')'; - dbgs() << '\n'; - ); + LLVM_DEBUG(dbgs() << "\n=== Picking candidate, Step = " << StepNo + << "\n" + "Ready queue:"; + for (auto &C + : RQ) dbgs() + << ' ' << C.SU->NodeNum << "(P" << C.Priority << ')'; + dbgs() << '\n';); auto C = pickCandidate(); assert(C); RQ.remove(*C); auto SU = C->SU; - DEBUG(dbgs() << "Selected "; SU->dump(&DAG)); + LLVM_DEBUG(dbgs() << "Selected "; SU->dump(&DAG)); releaseSuccessors(SU, StepNo); Schedule.push_back(SU); diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 12305446fa4..c0a6765df34 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -200,34 +200,30 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) { setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); // See if BotCand is still valid (because we previously scheduled from Top). - DEBUG(dbgs() << "Picking from Bot:\n"); + LLVM_DEBUG(dbgs() << "Picking from Bot:\n"); if (!BotCand.isValid() || BotCand.SU->isScheduled || BotCand.Policy != BotPolicy) { BotCand.reset(CandPolicy()); pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); assert(BotCand.Reason != NoCand && "failed to find the first candidate"); } else { - DEBUG(traceCandidate(BotCand)); + LLVM_DEBUG(traceCandidate(BotCand)); } // Check if the top Q has a better candidate. - DEBUG(dbgs() << "Picking from Top:\n"); + LLVM_DEBUG(dbgs() << "Picking from Top:\n"); if (!TopCand.isValid() || TopCand.SU->isScheduled || TopCand.Policy != TopPolicy) { TopCand.reset(CandPolicy()); pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); assert(TopCand.Reason != NoCand && "failed to find the first candidate"); } else { - DEBUG(traceCandidate(TopCand)); + LLVM_DEBUG(traceCandidate(TopCand)); } // Pick best from BotCand and TopCand. - DEBUG( - dbgs() << "Top Cand: "; - traceCandidate(TopCand); - dbgs() << "Bot Cand: "; - traceCandidate(BotCand); - ); + LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand); + dbgs() << "Bot Cand: "; traceCandidate(BotCand);); SchedCandidate Cand; if (TopCand.Reason == BotCand.Reason) { Cand = BotCand; @@ -256,10 +252,7 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) { } } } - DEBUG( - dbgs() << "Picking: "; - traceCandidate(Cand); - ); + LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand);); IsTopNode = Cand.AtTop; return Cand.SU; @@ -305,7 +298,8 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) { if (SU->isBottomReady()) Bot.removeReady(SU); - DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); + LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " + << *SU->getInstr()); return SU; } @@ -319,7 +313,7 @@ GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C, MFI.getMaxWavesPerEU())), MinOccupancy(StartingOccupancy), Stage(0), RegionIdx(0) { - DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n"); + LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n"); } void GCNScheduleDAGMILive::schedule() { @@ -339,12 +333,12 @@ void GCNScheduleDAGMILive::schedule() { if (LIS) { PressureBefore = Pressure[RegionIdx]; - DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:"; - GCNRPTracker::printLiveRegs(dbgs(), LiveIns[RegionIdx], MRI); - dbgs() << "Region live-in pressure: "; - llvm::getRegPressure(MRI, LiveIns[RegionIdx]).print(dbgs()); - dbgs() << "Region register pressure: "; - PressureBefore.print(dbgs())); + LLVM_DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:"; + GCNRPTracker::printLiveRegs(dbgs(), LiveIns[RegionIdx], MRI); + dbgs() << "Region live-in pressure: "; + llvm::getRegPressure(MRI, LiveIns[RegionIdx]).print(dbgs()); + dbgs() << "Region register pressure: "; + PressureBefore.print(dbgs())); } ScheduleDAGMILive::schedule(); @@ -357,12 +351,13 @@ void GCNScheduleDAGMILive::schedule() { GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; auto PressureAfter = getRealRegPressure(); - DEBUG(dbgs() << "Pressure after scheduling: "; PressureAfter.print(dbgs())); + LLVM_DEBUG(dbgs() << "Pressure after scheduling: "; + PressureAfter.print(dbgs())); if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit && PressureAfter.getVGPRNum() <= S.VGPRCriticalLimit) { Pressure[RegionIdx] = PressureAfter; - DEBUG(dbgs() << "Pressure in desired limits, done.\n"); + LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n"); return; } unsigned WavesAfter = getMaxWaves(PressureAfter.getSGPRNum(), @@ -371,16 +366,16 @@ void GCNScheduleDAGMILive::schedule() { PressureBefore.getVGPRNum(), MF); WavesAfter = std::min(WavesAfter, MFI.getMaxWavesPerEU()); WavesBefore = std::min(WavesBefore, MFI.getMaxWavesPerEU()); - DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore << - ", after " << WavesAfter << ".\n"); + LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore + << ", after " << WavesAfter << ".\n"); // We could not keep current target occupancy because of the just scheduled // region. Record new occupancy for next scheduling cycle. unsigned NewOccupancy = std::max(WavesAfter, WavesBefore); if (NewOccupancy < MinOccupancy) { MinOccupancy = NewOccupancy; - DEBUG(dbgs() << "Occupancy lowered for the function to " - << MinOccupancy << ".\n"); + LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to " + << MinOccupancy << ".\n"); } if (WavesAfter >= WavesBefore) { @@ -388,7 +383,7 @@ void GCNScheduleDAGMILive::schedule() { return; } - DEBUG(dbgs() << "Attempting to revert scheduling.\n"); + LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n"); RegionEnd = RegionBegin; for (MachineInstr *MI : Unsched) { if (MI->isDebugInstr()) @@ -418,7 +413,7 @@ void GCNScheduleDAGMILive::schedule() { } RegionEnd = MI->getIterator(); ++RegionEnd; - DEBUG(dbgs() << "Scheduling " << *MI); + LLVM_DEBUG(dbgs() << "Scheduling " << *MI); } RegionBegin = Unsched.front()->getIterator(); Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd); @@ -493,7 +488,7 @@ void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) { void GCNScheduleDAGMILive::finalizeSchedule() { GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; - DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n"); + LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n"); LiveIns.resize(Regions.size()); Pressure.resize(Regions.size()); @@ -512,9 +507,10 @@ void GCNScheduleDAGMILive::finalizeSchedule() { if (!LIS || StartingOccupancy <= MinOccupancy) break; - DEBUG(dbgs() - << "Retrying function scheduling with lowest recorded occupancy " - << MinOccupancy << ".\n"); + LLVM_DEBUG( + dbgs() + << "Retrying function scheduling with lowest recorded occupancy " + << MinOccupancy << ".\n"); S.setTargetOccupancy(MinOccupancy); } @@ -540,12 +536,13 @@ void GCNScheduleDAGMILive::finalizeSchedule() { continue; } - DEBUG(dbgs() << "********** MI Scheduling **********\n"); - DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*MBB) << " " - << MBB->getName() << "\n From: " << *begin() << " To: "; - if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; - else dbgs() << "End"; - dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); + LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n"); + LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*MBB) << " " + << MBB->getName() << "\n From: " << *begin() + << " To: "; + if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; + else dbgs() << "End"; + dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); schedule(); diff --git a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp index 5e1ba6b506d..a6838875622 100644 --- a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp +++ b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp @@ -121,7 +121,7 @@ bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu, LaterInstCount = getCFAluSize(LatrCFAlu); unsigned CumuledInsts = RootInstCount + LaterInstCount; if (CumuledInsts >= TII->getMaxAlusPerClause()) { - DEBUG(dbgs() << "Excess inst counts\n"); + LLVM_DEBUG(dbgs() << "Excess inst counts\n"); return false; } if (RootCFAlu.getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) @@ -139,7 +139,7 @@ bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu, RootCFAlu.getOperand(KBank0Idx).getImm() || LatrCFAlu.getOperand(KBank0LineIdx).getImm() != RootCFAlu.getOperand(KBank0LineIdx).getImm())) { - DEBUG(dbgs() << "Wrong KC0\n"); + LLVM_DEBUG(dbgs() << "Wrong KC0\n"); return false; } // Is KCache Bank 1 compatible ? @@ -155,7 +155,7 @@ bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu, RootCFAlu.getOperand(KBank1Idx).getImm() || LatrCFAlu.getOperand(KBank1LineIdx).getImm() != RootCFAlu.getOperand(KBank1LineIdx).getImm())) { - DEBUG(dbgs() << "Wrong KC0\n"); + LLVM_DEBUG(dbgs() << "Wrong KC0\n"); return false; } if (LatrCFAlu.getOperand(Mode0Idx).getImm()) { diff --git a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp index 0fbc254486d..b4ec8dfb343 100644 --- a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp +++ b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -531,7 +531,7 @@ public: for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) { if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) { - DEBUG(dbgs() << CfCount << ":"; I->dump();); + LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump();); FetchClauses.push_back(MakeFetchClause(MBB, I)); CfCount++; LastAlu.back() = nullptr; @@ -549,7 +549,8 @@ public: switch (MI->getOpcode()) { case AMDGPU::CF_ALU_PUSH_BEFORE: if (RequiresWorkAround) { - DEBUG(dbgs() << "Applying bug work-around for ALU_PUSH_BEFORE\n"); + LLVM_DEBUG(dbgs() + << "Applying bug work-around for ALU_PUSH_BEFORE\n"); BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG)) .addImm(CfCount + 1) .addImm(1); @@ -562,7 +563,7 @@ public: case AMDGPU::CF_ALU: I = MI; AluClauses.push_back(MakeALUClause(MBB, I)); - DEBUG(dbgs() << CfCount << ":"; MI->dump();); + LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump();); CfCount++; break; case AMDGPU::WHILELOOP: { @@ -597,7 +598,7 @@ public: .addImm(0) .addImm(0); IfThenElseStack.push_back(MIb); - DEBUG(dbgs() << CfCount << ":"; MIb->dump();); + LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); MI->eraseFromParent(); CfCount++; break; @@ -610,7 +611,7 @@ public: getHWInstrDesc(CF_ELSE)) .addImm(0) .addImm(0); - DEBUG(dbgs() << CfCount << ":"; MIb->dump();); + LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); IfThenElseStack.push_back(MIb); MI->eraseFromParent(); CfCount++; @@ -626,7 +627,7 @@ public: .addImm(CfCount + 1) .addImm(1); (void)MIb; - DEBUG(dbgs() << CfCount << ":"; MIb->dump();); + LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); CfCount++; } @@ -673,7 +674,7 @@ public: } default: if (TII->isExport(MI->getOpcode())) { - DEBUG(dbgs() << CfCount << ":"; MI->dump();); + LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump();); CfCount++; } break; diff --git a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp index f8d062ef52d..4bb4c037a44 100644 --- a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp +++ b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp @@ -78,7 +78,7 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { AllowSwitchFromAlu = true; } else { unsigned NeededWF = 62.5f / ALUFetchRationEstimate; - DEBUG( dbgs() << NeededWF << " approx. Wavefronts Required\n" ); + LLVM_DEBUG(dbgs() << NeededWF << " approx. Wavefronts Required\n"); // We assume the local GPR requirements to be "dominated" by the requirement // of the TEX clause (which consumes 128 bits regs) ; ALU inst before and // after TEX are indeed likely to consume or generate values from/for the @@ -124,26 +124,24 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { NextInstKind = IDOther; } - DEBUG( - if (SU) { - dbgs() << " ** Pick node **\n"; - SU->dump(DAG); - } else { - dbgs() << "NO NODE \n"; - for (unsigned i = 0; i < DAG->SUnits.size(); i++) { - const SUnit &S = DAG->SUnits[i]; - if (!S.isScheduled) - S.dump(DAG); - } - } - ); + LLVM_DEBUG(if (SU) { + dbgs() << " ** Pick node **\n"; + SU->dump(DAG); + } else { + dbgs() << "NO NODE \n"; + for (unsigned i = 0; i < DAG->SUnits.size(); i++) { + const SUnit &S = DAG->SUnits[i]; + if (!S.isScheduled) + S.dump(DAG); + } + }); return SU; } void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { if (NextInstKind != CurInstKind) { - DEBUG(dbgs() << "Instruction Type Switch\n"); + LLVM_DEBUG(dbgs() << "Instruction Type Switch\n"); if (NextInstKind != IDAlu) OccupedSlotsMask |= 31; CurEmitted = 0; @@ -172,8 +170,7 @@ void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { ++CurEmitted; } - - DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n"); + LLVM_DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n"); if (CurInstKind != IDFetch) { MoveUnits(Pending[IDFetch], Available[IDFetch]); @@ -190,11 +187,11 @@ isPhysicalRegCopy(MachineInstr *MI) { } void R600SchedStrategy::releaseTopNode(SUnit *SU) { - DEBUG(dbgs() << "Top Releasing ";SU->dump(DAG);); + LLVM_DEBUG(dbgs() << "Top Releasing "; SU->dump(DAG);); } void R600SchedStrategy::releaseBottomNode(SUnit *SU) { - DEBUG(dbgs() << "Bottom Releasing ";SU->dump(DAG);); + LLVM_DEBUG(dbgs() << "Bottom Releasing "; SU->dump(DAG);); if (isPhysicalRegCopy(SU->getInstr())) { PhysicalRegCopy.push_back(SU); return; @@ -345,7 +342,7 @@ void R600SchedStrategy::LoadAlu() { } void R600SchedStrategy::PrepareNextSlot() { - DEBUG(dbgs() << "New Slot\n"); + LLVM_DEBUG(dbgs() << "New Slot\n"); assert (OccupedSlotsMask && "Slot wasn't filled"); OccupedSlotsMask = 0; // if (HwGen == R600Subtarget::NORTHERN_ISLANDS) diff --git a/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp index 4a14d95f1cc..cb46855f475 100644 --- a/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp +++ b/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp @@ -228,20 +228,20 @@ MachineInstr *R600VectorRegMerger::RebuildVector( UpdatedUndef.erase(ChanPos); assert(!is_contained(UpdatedUndef, Chan) && "UpdatedUndef shouldn't contain Chan more than once!"); - DEBUG(dbgs() << " ->"; Tmp->dump();); + LLVM_DEBUG(dbgs() << " ->"; Tmp->dump();); (void)Tmp; SrcVec = DstReg; } MachineInstr *NewMI = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg).addReg(SrcVec); - DEBUG(dbgs() << " ->"; NewMI->dump();); + LLVM_DEBUG(dbgs() << " ->"; NewMI->dump();); - DEBUG(dbgs() << " Updating Swizzle:\n"); + LLVM_DEBUG(dbgs() << " Updating Swizzle:\n"); for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg), E = MRI->use_instr_end(); It != E; ++It) { - DEBUG(dbgs() << " ";(*It).dump(); dbgs() << " ->"); + LLVM_DEBUG(dbgs() << " "; (*It).dump(); dbgs() << " ->"); SwizzleInput(*It, RemapChan); - DEBUG((*It).dump()); + LLVM_DEBUG((*It).dump()); } RSI->Instr->eraseFromParent(); @@ -372,14 +372,14 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { if (!areAllUsesSwizzeable(Reg)) continue; - DEBUG({ + LLVM_DEBUG({ dbgs() << "Trying to optimize "; MI.dump(); }); RegSeqInfo CandidateRSI; std::vector<std::pair<unsigned, unsigned>> RemapChan; - DEBUG(dbgs() << "Using common slots...\n";); + LLVM_DEBUG(dbgs() << "Using common slots...\n";); if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) { // Remove CandidateRSI mapping RemoveMI(CandidateRSI.Instr); @@ -387,7 +387,7 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { trackRSI(RSI); continue; } - DEBUG(dbgs() << "Using free slots...\n";); + LLVM_DEBUG(dbgs() << "Using free slots...\n";); RemapChan.clear(); if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) { RemoveMI(CandidateRSI.Instr); diff --git a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp index 7340318d2d8..069e9dcb123 100644 --- a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp +++ b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp @@ -236,7 +236,7 @@ public: if (ConsideredInstUsesAlreadyWrittenVectorElement && !TII->isVectorOnly(MI) && VLIW5) { isTransSlot = true; - DEBUG({ + LLVM_DEBUG({ dbgs() << "Considering as Trans Inst :"; MI.dump(); }); @@ -249,7 +249,7 @@ public: // Are the Constants limitations met ? CurrentPacketMIs.push_back(&MI); if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) { - DEBUG({ + LLVM_DEBUG({ dbgs() << "Couldn't pack :\n"; MI.dump(); dbgs() << "with the following packets :\n"; @@ -266,7 +266,7 @@ public: // Is there a BankSwizzle set that meet Read Port limitations ? if (!TII->fitsReadPortLimitations(CurrentPacketMIs, PV, BS, isTransSlot)) { - DEBUG({ + LLVM_DEBUG({ dbgs() << "Couldn't pack :\n"; MI.dump(); dbgs() << "with the following packets :\n"; diff --git a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp index 11fea5d6ee7..8ef4315e67f 100644 --- a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp @@ -201,7 +201,7 @@ bool SIAnnotateControlFlow::isElse(PHINode *Phi) { // Erase "Phi" if it is not used any more void SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) { if (RecursivelyDeleteDeadPHINode(Phi)) { - DEBUG(dbgs() << "Erased unused condition phi\n"); + LLVM_DEBUG(dbgs() << "Erased unused condition phi\n"); } } diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index e26bc99bd4b..033d08a42b6 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -513,9 +513,9 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, if (MDT.dominates(MI1, MI2)) { if (!intereferes(MI2, MI1)) { - DEBUG(dbgs() << "Erasing from " - << printMBBReference(*MI2->getParent()) << " " - << *MI2); + LLVM_DEBUG(dbgs() + << "Erasing from " + << printMBBReference(*MI2->getParent()) << " " << *MI2); MI2->eraseFromParent(); Defs.erase(I2++); Changed = true; @@ -523,9 +523,9 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, } } else if (MDT.dominates(MI2, MI1)) { if (!intereferes(MI1, MI2)) { - DEBUG(dbgs() << "Erasing from " - << printMBBReference(*MI1->getParent()) << " " - << *MI1); + LLVM_DEBUG(dbgs() + << "Erasing from " + << printMBBReference(*MI1->getParent()) << " " << *MI1); MI1->eraseFromParent(); Defs.erase(I1++); Changed = true; @@ -541,11 +541,12 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, MachineBasicBlock::iterator I = MBB->getFirstNonPHI(); if (!intereferes(MI1, I) && !intereferes(MI2, I)) { - DEBUG(dbgs() << "Erasing from " - << printMBBReference(*MI1->getParent()) << " " << *MI1 - << "and moving from " - << printMBBReference(*MI2->getParent()) << " to " - << printMBBReference(*I->getParent()) << " " << *MI2); + LLVM_DEBUG(dbgs() + << "Erasing from " + << printMBBReference(*MI1->getParent()) << " " << *MI1 + << "and moving from " + << printMBBReference(*MI2->getParent()) << " to " + << printMBBReference(*I->getParent()) << " " << *MI2); I->getParent()->splice(I, MI2->getParent(), MI2); MI1->eraseFromParent(); Defs.erase(I1++); @@ -633,7 +634,8 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { if (!predsHasDivergentTerminator(MBB0, TRI) && !predsHasDivergentTerminator(MBB1, TRI)) { - DEBUG(dbgs() << "Not fixing PHI for uniform branch: " << MI << '\n'); + LLVM_DEBUG(dbgs() + << "Not fixing PHI for uniform branch: " << MI << '\n'); break; } } @@ -673,7 +675,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { SmallSet<unsigned, 8> Visited; if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) { - DEBUG(dbgs() << "Fixing PHI: " << MI); + LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI); TII->moveToVALU(MI); } break; @@ -685,7 +687,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { continue; } - DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI); + LLVM_DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI); TII->moveToVALU(MI); break; @@ -696,7 +698,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { Src1RC = MRI.getRegClass(MI.getOperand(2).getReg()); if (TRI->isSGPRClass(DstRC) && (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) { - DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI); + LLVM_DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI); TII->moveToVALU(MI); } break; diff --git a/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp index 7a3caf4db71..dec88084708 100644 --- a/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp @@ -58,7 +58,7 @@ bool SIFixVGPRCopies::runOnMachineFunction(MachineFunction &MF) { if (TII->isVGPRCopy(MI) && !MI.readsRegister(AMDGPU::EXEC, TRI)) { MI.addOperand(MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); - DEBUG(dbgs() << "Add exec use to " << MI); + LLVM_DEBUG(dbgs() << "Add exec use to " << MI); Changed = true; } break; diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index e4f121368a4..d41d151492d 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -637,14 +637,14 @@ static bool tryFoldInst(const SIInstrInfo *TII, const MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0); const MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1); if (Src1->isIdenticalTo(*Src0)) { - DEBUG(dbgs() << "Folded " << *MI << " into "); + LLVM_DEBUG(dbgs() << "Folded " << *MI << " into "); int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); if (Src2Idx != -1) MI->RemoveOperand(Src2Idx); MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1)); mutateCopyOp(*MI, TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY : getMovOpc(false))); - DEBUG(dbgs() << *MI << '\n'); + LLVM_DEBUG(dbgs() << *MI << '\n'); return true; } } @@ -685,7 +685,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI, // be folded due to multiple uses or operand constraints. if (OpToFold.isImm() && tryConstantFoldOp(*MRI, TII, UseMI, &OpToFold)) { - DEBUG(dbgs() << "Constant folded " << *UseMI <<'\n'); + LLVM_DEBUG(dbgs() << "Constant folded " << *UseMI << '\n'); // Some constant folding cases change the same immediate's use to a new // instruction, e.g. and x, 0 -> 0. Make sure we re-visit the user @@ -752,8 +752,9 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI, // copies. MRI->clearKillFlags(Fold.OpToFold->getReg()); } - DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " << - static_cast<int>(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n'); + LLVM_DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " + << static_cast<int>(Fold.UseOpNo) << " of " + << *Fold.UseMI << '\n'); tryFoldInst(TII, Fold.UseMI); } else if (Fold.isCommuted()) { // Restoring instruction's original operand order if fold has failed. @@ -833,7 +834,8 @@ bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) { if (!DefClamp) return false; - DEBUG(dbgs() << "Folding clamp " << *DefClamp << " into " << *Def << '\n'); + LLVM_DEBUG(dbgs() << "Folding clamp " << *DefClamp << " into " << *Def + << '\n'); // Clamp is applied after omod, so it is OK if omod is set. DefClamp->setImm(1); @@ -956,7 +958,7 @@ bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) { if (TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp)) return false; - DEBUG(dbgs() << "Folding omod " << MI << " into " << *Def << '\n'); + LLVM_DEBUG(dbgs() << "Folding omod " << MI << " into " << *Def << '\n'); DefOMod->setImm(OMod); MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg()); diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index e6aaaf94751..0e1c2bc3172 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -350,9 +350,7 @@ public: void setWaitcnt(MachineInstr *WaitcntIn) { LfWaitcnt = WaitcntIn; } MachineInstr *getWaitcnt() const { return LfWaitcnt; } - void print() { - DEBUG(dbgs() << " iteration " << IterCnt << '\n';); - } + void print() { LLVM_DEBUG(dbgs() << " iteration " << IterCnt << '\n';); } private: // s_waitcnt added at the end of loop footer to stablize wait scores @@ -515,7 +513,7 @@ void BlockWaitcntBrackets::setExpScore(const MachineInstr *MI, const MachineRegisterInfo *MRI, unsigned OpNo, int32_t Val) { RegInterval Interval = getRegInterval(MI, TII, MRI, TRI, OpNo, false); - DEBUG({ + LLVM_DEBUG({ const MachineOperand &Opnd = MI->getOperand(OpNo); assert(TRI->isVGPR(*MRI, Opnd.getReg())); }); @@ -1206,8 +1204,9 @@ void SIInsertWaitcnts::generateWaitcntInstBefore( ScoreBracket = BlockWaitcntBracketsMap[TBB].get(); } ScoreBracket->setRevisitLoop(true); - DEBUG(dbgs() << "set-revisit: Block" - << ContainingLoop->getHeader()->getNumber() << '\n';); + LLVM_DEBUG(dbgs() + << "set-revisit: Block" + << ContainingLoop->getHeader()->getNumber() << '\n';); } } @@ -1242,26 +1241,29 @@ void SIInsertWaitcnts::generateWaitcntInstBefore( if (insertSWaitInst) { if (OldWaitcnt && OldWaitcnt->getOpcode() == AMDGPU::S_WAITCNT) { if (ForceEmitZeroWaitcnts) - DEBUG(dbgs() << "Force emit s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)\n"); + LLVM_DEBUG( + dbgs() + << "Force emit s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)\n"); if (IsForceEmitWaitcnt) - DEBUG(dbgs() << "Force emit a s_waitcnt due to debug counter\n"); + LLVM_DEBUG(dbgs() + << "Force emit a s_waitcnt due to debug counter\n"); OldWaitcnt->getOperand(0).setImm(Enc); if (!OldWaitcnt->getParent()) MI.getParent()->insert(MI, OldWaitcnt); - DEBUG(dbgs() << "updateWaitcntInBlock\n" - << "Old Instr: " << MI << '\n' - << "New Instr: " << *OldWaitcnt << '\n'); + LLVM_DEBUG(dbgs() << "updateWaitcntInBlock\n" + << "Old Instr: " << MI << '\n' + << "New Instr: " << *OldWaitcnt << '\n'); } else { auto SWaitInst = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT)) .addImm(Enc); TrackedWaitcntSet.insert(SWaitInst); - DEBUG(dbgs() << "insertWaitcntInBlock\n" - << "Old Instr: " << MI << '\n' - << "New Instr: " << *SWaitInst << '\n'); + LLVM_DEBUG(dbgs() << "insertWaitcntInBlock\n" + << "Old Instr: " << MI << '\n' + << "New Instr: " << *SWaitInst << '\n'); } } @@ -1670,7 +1672,7 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF, BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&Block].get(); - DEBUG({ + LLVM_DEBUG({ dbgs() << "*** Block" << Block.getNumber() << " ***"; ScoreBrackets->dump(); }); @@ -1731,7 +1733,7 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF, ScoreBrackets->clearWaitcnt(); - DEBUG({ + LLVM_DEBUG({ Inst.print(dbgs()); ScoreBrackets->dump(); }); @@ -1771,7 +1773,7 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF, if (ContainingLoop && isLoopBottom(ContainingLoop, &Block)) { LoopWaitcntData *WaitcntData = LoopWaitcntDataMap[ContainingLoop].get(); WaitcntData->print(); - DEBUG(dbgs() << '\n';); + LLVM_DEBUG(dbgs() << '\n';); // The iterative waitcnt insertion algorithm aims for optimal waitcnt // placement and doesn't always guarantee convergence for a loop. Each @@ -1811,7 +1813,7 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF, } if (SWaitInst) { - DEBUG({ + LLVM_DEBUG({ SWaitInst->print(dbgs()); dbgs() << "\nAdjusted score board:"; ScoreBrackets->dump(); @@ -1896,8 +1898,8 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { if ((std::count(BlockWaitcntProcessedSet.begin(), BlockWaitcntProcessedSet.end(), &MBB) < Count)) { BlockWaitcntBracketsMap[&MBB]->setRevisitLoop(true); - DEBUG(dbgs() << "set-revisit: Block" - << ContainingLoop->getHeader()->getNumber() << '\n';); + LLVM_DEBUG(dbgs() << "set-revisit: Block" + << ContainingLoop->getHeader()->getNumber() << '\n';); } } @@ -1931,7 +1933,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { } LoopWaitcntData *WaitcntData = LoopWaitcntDataMap[ContainingLoop].get(); WaitcntData->incIterCnt(); - DEBUG(dbgs() << "revisit: Block" << EntryBB->getNumber() << '\n';); + LLVM_DEBUG(dbgs() << "revisit: Block" << EntryBB->getNumber() << '\n';); continue; } else { LoopWaitcntData *WaitcntData = LoopWaitcntDataMap[ContainingLoop].get(); diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index e5304818afb..1ebc45dc9ee 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -553,7 +553,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair( CI.I->eraseFromParent(); CI.Paired->eraseFromParent(); - DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n'); + LLVM_DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n'); return Next; } @@ -631,7 +631,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair( CI.I->eraseFromParent(); CI.Paired->eraseFromParent(); - DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n'); + LLVM_DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n'); return Next; } @@ -950,7 +950,7 @@ bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) { assert(MRI->isSSA() && "Must be run on SSA"); - DEBUG(dbgs() << "Running SILoadStoreOptimizer\n"); + LLVM_DEBUG(dbgs() << "Running SILoadStoreOptimizer\n"); bool Modified = false; diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp index 0fd4c6bfed9..86f81136bc5 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -1207,7 +1207,7 @@ void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVaria NextReservedID = 1; NextNonReservedID = DAGSize + 1; - DEBUG(dbgs() << "Coloring the graph\n"); + LLVM_DEBUG(dbgs() << "Coloring the graph\n"); if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesGrouped) colorHighLatenciesGroups(); @@ -1264,13 +1264,11 @@ void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVaria SIScheduleBlock *Block = CurrentBlocks[i]; Block->finalizeUnits(); } - DEBUG( - dbgs() << "Blocks created:\n\n"; - for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) { - SIScheduleBlock *Block = CurrentBlocks[i]; - Block->printDebug(true); - } - ); + LLVM_DEBUG(dbgs() << "Blocks created:\n\n"; + for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) { + SIScheduleBlock *Block = CurrentBlocks[i]; + Block->printDebug(true); + }); } // Two functions taken from Codegen/MachineScheduler.cpp @@ -1290,7 +1288,7 @@ void SIScheduleBlockCreator::topologicalSort() { unsigned DAGSize = CurrentBlocks.size(); std::vector<int> WorkList; - DEBUG(dbgs() << "Topological Sort\n"); + LLVM_DEBUG(dbgs() << "Topological Sort\n"); WorkList.reserve(DAGSize); TopDownIndex2Block.resize(DAGSize); @@ -1337,11 +1335,11 @@ void SIScheduleBlockCreator::topologicalSort() { void SIScheduleBlockCreator::scheduleInsideBlocks() { unsigned DAGSize = CurrentBlocks.size(); - DEBUG(dbgs() << "\nScheduling Blocks\n\n"); + LLVM_DEBUG(dbgs() << "\nScheduling Blocks\n\n"); // We do schedule a valid scheduling such that a Block corresponds // to a range of instructions. - DEBUG(dbgs() << "First phase: Fast scheduling for Reg Liveness\n"); + LLVM_DEBUG(dbgs() << "First phase: Fast scheduling for Reg Liveness\n"); for (unsigned i = 0, e = DAGSize; i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks[i]; Block->fastSchedule(); @@ -1395,7 +1393,7 @@ void SIScheduleBlockCreator::scheduleInsideBlocks() { Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr()); } - DEBUG(dbgs() << "Restoring MI Pos\n"); + LLVM_DEBUG(dbgs() << "Restoring MI Pos\n"); // Restore old ordering (which prevents a LIS->handleMove bug). for (unsigned i = PosOld.size(), e = 0; i != e; --i) { MachineBasicBlock::iterator POld = PosOld[i-1]; @@ -1409,12 +1407,10 @@ void SIScheduleBlockCreator::scheduleInsideBlocks() { } } - DEBUG( - for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) { - SIScheduleBlock *Block = CurrentBlocks[i]; - Block->printDebug(true); - } - ); + LLVM_DEBUG(for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) { + SIScheduleBlock *Block = CurrentBlocks[i]; + Block->printDebug(true); + }); } void SIScheduleBlockCreator::fillStats() { @@ -1565,13 +1561,10 @@ SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG, blockScheduled(Block); } - DEBUG( - dbgs() << "Block Order:"; - for (SIScheduleBlock* Block : BlocksScheduled) { - dbgs() << ' ' << Block->getID(); - } - dbgs() << '\n'; - ); + LLVM_DEBUG(dbgs() << "Block Order:"; for (SIScheduleBlock *Block + : BlocksScheduled) { + dbgs() << ' ' << Block->getID(); + } dbgs() << '\n';); } bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand, @@ -1634,18 +1627,17 @@ SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() { maxVregUsage = VregCurrentUsage; if (SregCurrentUsage > maxSregUsage) maxSregUsage = SregCurrentUsage; - DEBUG( - dbgs() << "Picking New Blocks\n"; - dbgs() << "Available: "; - for (SIScheduleBlock* Block : ReadyBlocks) - dbgs() << Block->getID() << ' '; - dbgs() << "\nCurrent Live:\n"; - for (unsigned Reg : LiveRegs) - dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; - dbgs() << '\n'; - dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; - dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n'; - ); + LLVM_DEBUG(dbgs() << "Picking New Blocks\n"; dbgs() << "Available: "; + for (SIScheduleBlock *Block + : ReadyBlocks) dbgs() + << Block->getID() << ' '; + dbgs() << "\nCurrent Live:\n"; + for (unsigned Reg + : LiveRegs) dbgs() + << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; + dbgs() << '\n'; + dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; + dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';); Cand.Block = nullptr; for (std::vector<SIScheduleBlock*>::iterator I = ReadyBlocks.begin(), @@ -1677,20 +1669,18 @@ SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() { if (TryCand.Reason != NoCand) { Cand.setBest(TryCand); Best = I; - DEBUG(dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' ' - << getReasonStr(Cand.Reason) << '\n'); + LLVM_DEBUG(dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' ' + << getReasonStr(Cand.Reason) << '\n'); } } - DEBUG( - dbgs() << "Picking: " << Cand.Block->getID() << '\n'; - dbgs() << "Is a block with high latency instruction: " - << (Cand.IsHighLatency ? "yes\n" : "no\n"); - dbgs() << "Position of last high latency dependency: " - << Cand.LastPosHighLatParentScheduled << '\n'; - dbgs() << "VGPRUsageDiff: " << Cand.VGPRUsageDiff << '\n'; - dbgs() << '\n'; - ); + LLVM_DEBUG(dbgs() << "Picking: " << Cand.Block->getID() << '\n'; + dbgs() << "Is a block with high latency instruction: " + << (Cand.IsHighLatency ? "yes\n" : "no\n"); + dbgs() << "Position of last high latency dependency: " + << Cand.LastPosHighLatParentScheduled << '\n'; + dbgs() << "VGPRUsageDiff: " << Cand.VGPRUsageDiff << '\n'; + dbgs() << '\n';); Block = Cand.Block; ReadyBlocks.erase(Best); @@ -1939,13 +1929,10 @@ void SIScheduleDAGMI::schedule() { SmallVector<SUnit*, 8> TopRoots, BotRoots; SIScheduleBlockResult Best, Temp; - DEBUG(dbgs() << "Preparing Scheduling\n"); + LLVM_DEBUG(dbgs() << "Preparing Scheduling\n"); buildDAGWithRegPressure(); - DEBUG( - for(SUnit& SU : SUnits) - SU.dumpAll(this) - ); + LLVM_DEBUG(for (SUnit &SU : SUnits) SU.dumpAll(this)); topologicalSort(); findRootsAndBiasEdges(TopRoots, BotRoots); @@ -2047,15 +2034,15 @@ void SIScheduleDAGMI::schedule() scheduleMI(SU, true); - DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " - << *SU->getInstr()); + LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " + << *SU->getInstr()); } assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); placeDebugValues(); - DEBUG({ + LLVM_DEBUG({ dbgs() << "*** Final schedule for " << printMBBReference(*begin()->getParent()) << " ***\n"; dumpSchedule(); diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index ddf45bbccb9..b68df539771 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -243,11 +243,11 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { // Fold exec = COPY (S_AND_B64 reg, exec) -> exec = S_AND_B64 reg, exec if (CopyToExecInst->getOperand(1).isKill() && isLogicalOpOnExec(*PrepareExecInst) == CopyToExec) { - DEBUG(dbgs() << "Fold exec copy: " << *PrepareExecInst); + LLVM_DEBUG(dbgs() << "Fold exec copy: " << *PrepareExecInst); PrepareExecInst->getOperand(0).setReg(AMDGPU::EXEC); - DEBUG(dbgs() << "into: " << *PrepareExecInst << '\n'); + LLVM_DEBUG(dbgs() << "into: " << *PrepareExecInst << '\n'); CopyToExecInst->eraseFromParent(); } @@ -257,7 +257,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { if (isLiveOut(MBB, CopyToExec)) { // The copied register is live out and has a second use in another block. - DEBUG(dbgs() << "Exec copy source register is live out\n"); + LLVM_DEBUG(dbgs() << "Exec copy source register is live out\n"); continue; } @@ -269,7 +269,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { = std::next(CopyFromExecInst->getIterator()), JE = I->getIterator(); J != JE; ++J) { if (SaveExecInst && J->readsRegister(AMDGPU::EXEC, TRI)) { - DEBUG(dbgs() << "exec read prevents saveexec: " << *J << '\n'); + LLVM_DEBUG(dbgs() << "exec read prevents saveexec: " << *J << '\n'); // Make sure this is inserted after any VALU ops that may have been // scheduled in between. SaveExecInst = nullptr; @@ -280,8 +280,8 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { if (J->modifiesRegister(CopyToExec, TRI)) { if (SaveExecInst) { - DEBUG(dbgs() << "Multiple instructions modify " - << printReg(CopyToExec, TRI) << '\n'); + LLVM_DEBUG(dbgs() << "Multiple instructions modify " + << printReg(CopyToExec, TRI) << '\n'); SaveExecInst = nullptr; break; } @@ -292,10 +292,11 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { if (ReadsCopyFromExec) { SaveExecInst = &*J; - DEBUG(dbgs() << "Found save exec op: " << *SaveExecInst << '\n'); + LLVM_DEBUG(dbgs() << "Found save exec op: " << *SaveExecInst << '\n'); continue; } else { - DEBUG(dbgs() << "Instruction does not read exec copy: " << *J << '\n'); + LLVM_DEBUG(dbgs() + << "Instruction does not read exec copy: " << *J << '\n'); break; } } else if (ReadsCopyFromExec && !SaveExecInst) { @@ -307,8 +308,8 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { // spill %sgpr0_sgpr1 // %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1 // - DEBUG(dbgs() << "Found second use of save inst candidate: " - << *J << '\n'); + LLVM_DEBUG(dbgs() << "Found second use of save inst candidate: " << *J + << '\n'); break; } @@ -321,7 +322,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { if (!SaveExecInst) continue; - DEBUG(dbgs() << "Insert save exec op: " << *SaveExecInst << '\n'); + LLVM_DEBUG(dbgs() << "Insert save exec op: " << *SaveExecInst << '\n'); MachineOperand &Src0 = SaveExecInst->getOperand(1); MachineOperand &Src1 = SaveExecInst->getOperand(2); diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index b7bb80acb71..c9e3e5696f3 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -143,7 +143,8 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) { I->hasUnmodeledSideEffects() || I->hasOrderedMemoryRef()) break; - DEBUG(dbgs() << "Removing no effect instruction: " << *I << '\n'); + LLVM_DEBUG(dbgs() + << "Removing no effect instruction: " << *I << '\n'); for (auto &Op : I->operands()) { if (Op.isReg()) @@ -193,7 +194,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) { !getOrExecSource(*NextLead, *TII, MRI)) continue; - DEBUG(dbgs() << "Redundant EXEC = S_OR_B64 found: " << *Lead << '\n'); + LLVM_DEBUG(dbgs() << "Redundant EXEC = S_OR_B64 found: " << *Lead << '\n'); auto SaveExec = getOrExecSource(*Lead, *TII, MRI); unsigned SaveExecReg = getOrNonExecReg(*Lead, *TII); @@ -224,7 +225,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) { break; } - DEBUG(dbgs() << "Redundant EXEC COPY: " << *SaveExec << '\n'); + LLVM_DEBUG(dbgs() << "Redundant EXEC COPY: " << *SaveExec << '\n'); } if (SafeToReplace) { diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index 7a56b4d1bd6..6f9d7522872 100644 --- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -846,7 +846,7 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) { void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) { for (MachineInstr &MI : MBB) { if (auto Operand = matchSDWAOperand(MI)) { - DEBUG(dbgs() << "Match: " << MI << "To: " << *Operand << '\n'); + LLVM_DEBUG(dbgs() << "Match: " << MI << "To: " << *Operand << '\n'); SDWAOperands[&MI] = std::move(Operand); ++NumSDWAPatternsFound; } @@ -901,7 +901,7 @@ bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI, bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands) { - DEBUG(dbgs() << "Convert instruction:" << MI); + LLVM_DEBUG(dbgs() << "Convert instruction:" << MI); // Convert to sdwa int SDWAOpcode; @@ -1050,7 +1050,7 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, // Apply all sdwa operand patterns. bool Converted = false; for (auto &Operand : SDWAOperands) { - DEBUG(dbgs() << *SDWAInst << "\nOperand: " << *Operand); + LLVM_DEBUG(dbgs() << *SDWAInst << "\nOperand: " << *Operand); // There should be no intesection between SDWA operands and potential MIs // e.g.: // v_and_b32 v0, 0xff, v1 -> src:v1 sel:BYTE_0 @@ -1071,7 +1071,7 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, return false; } - DEBUG(dbgs() << "\nInto:" << *SDWAInst << '\n'); + LLVM_DEBUG(dbgs() << "\nInto:" << *SDWAInst << '\n'); ++NumSDWAInstructionsPeepholed; MI.eraseFromParent(); diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 33fd5a30791..3c4c3baf799 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -495,7 +495,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { } // We can shrink this instruction - DEBUG(dbgs() << "Shrinking " << MI); + LLVM_DEBUG(dbgs() << "Shrinking " << MI); MachineInstrBuilder Inst32 = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32)); @@ -539,9 +539,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { MI.eraseFromParent(); foldImmediates(*Inst32, TII, MRI); - DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n'); - - + LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n'); } } return false; diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 89e5d56d1e0..7de132e0ed1 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -679,7 +679,8 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, if (!isEntry && BI.Needs == StateWQM && BI.OutNeeds != StateExact) return; - DEBUG(dbgs() << "\nProcessing block " << printMBBReference(MBB) << ":\n"); + LLVM_DEBUG(dbgs() << "\nProcessing block " << printMBBReference(MBB) + << ":\n"); unsigned SavedWQMReg = 0; unsigned SavedNonWWMReg = 0; @@ -882,7 +883,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { } } - DEBUG(printInfo()); + LLVM_DEBUG(printInfo()); lowerCopyInstrs(); |