summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInline.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp81
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp392
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp47
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp247
-rw-r--r--llvm/lib/Target/AMDGPU/GCNILPSched.cpp27
-rw-r--r--llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp65
-rw-r--r--llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp51
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp77
-rw-r--r--llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp15
-rw-r--r--llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp37
-rw-r--r--llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp16
-rw-r--r--llvm/lib/Target/AMDGPU/R600Packetizer.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp32
-rw-r--r--llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp16
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp44
-rw-r--r--llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp99
-rw-r--r--llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp23
-rw-r--r--llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp5
29 files changed, 649 insertions, 699 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
index ca77795ce10..35dd9eb0a47 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
@@ -161,8 +161,8 @@ static bool isWrapperOnlyCall(CallSite CS) {
return false;
}
if (isa<ReturnInst>(*std::next(I->getIterator()))) {
- DEBUG(dbgs() << " Wrapper only call detected: "
- << Callee->getName() << '\n');
+ LLVM_DEBUG(dbgs() << " Wrapper only call detected: "
+ << Callee->getName() << '\n');
return true;
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index dd45ced6ecc..7a7ed7a4f06 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -765,8 +765,7 @@ bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
ArrayRef<double> tmp(DVal);
nval = ConstantDataVector::get(context, tmp);
}
- DEBUG(errs() << "AMDIC: " << *CI
- << " ---> " << *nval << "\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
replaceCall(nval);
return true;
}
@@ -776,8 +775,7 @@ bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
for (int i = 0; i < sz; ++i) {
if (CF->isExactlyValue(ftbl[i].input)) {
Value *nval = ConstantFP::get(CF->getType(), ftbl[i].result);
- DEBUG(errs() << "AMDIC: " << *CI
- << " ---> " << *nval << "\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
replaceCall(nval);
return true;
}
@@ -798,11 +796,11 @@ bool AMDGPULibCalls::replaceWithNative(CallInst *CI, const FuncInfo &FInfo) {
AMDGPULibFunc nf = FInfo;
nf.setPrefix(AMDGPULibFunc::NATIVE);
if (Constant *FPExpr = getFunction(M, nf)) {
- DEBUG(dbgs() << "AMDIC: " << *CI << " ---> ");
+ LLVM_DEBUG(dbgs() << "AMDIC: " << *CI << " ---> ");
CI->setCalledFunction(FPExpr);
- DEBUG(dbgs() << *CI << '\n');
+ LLVM_DEBUG(dbgs() << *CI << '\n');
return true;
}
@@ -820,8 +818,7 @@ bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B,
Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0),
opr0,
"recip2div");
- DEBUG(errs() << "AMDIC: " << *CI
- << " ---> " << *nval << "\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
replaceCall(nval);
return true;
}
@@ -899,7 +896,7 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0) || CZero) {
// pow/powr/pown(x, 0) == 1
- DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n");
Constant *cnval = ConstantFP::get(eltType, 1.0);
if (getVecSize(FInfo) > 1) {
cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
@@ -909,23 +906,21 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
}
if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) {
// pow/powr/pown(x, 1.0) = x
- DEBUG(errs() << "AMDIC: " << *CI
- << " ---> " << *opr0 << "\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
replaceCall(opr0);
return true;
}
if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) {
// pow/powr/pown(x, 2.0) = x*x
- DEBUG(errs() << "AMDIC: " << *CI
- << " ---> " << *opr0 << " * " << *opr0 << "\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * " << *opr0
+ << "\n");
Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
replaceCall(nval);
return true;
}
if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
// pow/powr/pown(x, -1.0) = 1.0/x
- DEBUG(errs() << "AMDIC: " << *CI
- << " ---> 1 / " << *opr0 << "\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1 / " << *opr0 << "\n");
Constant *cnval = ConstantFP::get(eltType, 1.0);
if (getVecSize(FInfo) > 1) {
cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
@@ -942,8 +937,8 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
if (Constant *FPExpr = getFunction(M,
AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
: AMDGPULibFunc::EI_RSQRT, FInfo))) {
- DEBUG(errs() << "AMDIC: " << *CI << " ---> "
- << FInfo.getName().c_str() << "(" << *opr0 << ")\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
+ << FInfo.getName().c_str() << "(" << *opr0 << ")\n");
Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
: "__pow2rsqrt");
replaceCall(nval);
@@ -999,8 +994,9 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
}
nval = B.CreateFDiv(cnval, nval, "__1powprod");
}
- DEBUG(errs() << "AMDIC: " << *CI << " ---> "
- << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0 << ")\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
+ << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0
+ << ")\n");
replaceCall(nval);
return true;
}
@@ -1137,8 +1133,8 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
nval = B.CreateBitCast(nval, opr0->getType());
}
- DEBUG(errs() << "AMDIC: " << *CI << " ---> "
- << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
+ << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n");
replaceCall(nval);
return true;
@@ -1155,8 +1151,7 @@ bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
}
int ci_opr1 = (int)CINT->getSExtValue();
if (ci_opr1 == 1) { // rootn(x, 1) = x
- DEBUG(errs() << "AMDIC: " << *CI
- << " ---> " << *opr0 << "\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
replaceCall(opr0);
return true;
}
@@ -1166,7 +1161,7 @@ bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
Module *M = CI->getModule();
if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT,
FInfo))) {
- DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n");
Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt");
replaceCall(nval);
return true;
@@ -1175,13 +1170,13 @@ bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
Module *M = CI->getModule();
if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT,
FInfo))) {
- DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n");
Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
replaceCall(nval);
return true;
}
} else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
- DEBUG(errs() << "AMDIC: " << *CI << " ---> 1.0 / " << *opr0 << "\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1.0 / " << *opr0 << "\n");
Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
opr0,
"__rootn2div");
@@ -1193,7 +1188,8 @@ bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
Module *M = CI->getModule();
if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT,
FInfo))) {
- DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0 << ")\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0
+ << ")\n");
Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt");
replaceCall(nval);
return true;
@@ -1212,22 +1208,22 @@ bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
if ((CF0 && CF0->isZero()) || (CF1 && CF1->isZero())) {
// fma/mad(a, b, c) = c if a=0 || b=0
- DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr2 << "\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr2 << "\n");
replaceCall(opr2);
return true;
}
if (CF0 && CF0->isExactlyValue(1.0f)) {
// fma/mad(a, b, c) = b+c if a=1
- DEBUG(errs() << "AMDIC: " << *CI << " ---> "
- << *opr1 << " + " << *opr2 << "\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr1 << " + " << *opr2
+ << "\n");
Value *nval = B.CreateFAdd(opr1, opr2, "fmaadd");
replaceCall(nval);
return true;
}
if (CF1 && CF1->isExactlyValue(1.0f)) {
// fma/mad(a, b, c) = a+c if b=1
- DEBUG(errs() << "AMDIC: " << *CI << " ---> "
- << *opr0 << " + " << *opr2 << "\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " + " << *opr2
+ << "\n");
Value *nval = B.CreateFAdd(opr0, opr2, "fmaadd");
replaceCall(nval);
return true;
@@ -1235,8 +1231,8 @@ bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
if (ConstantFP *CF = dyn_cast<ConstantFP>(opr2)) {
if (CF->isZero()) {
// fma/mad(a, b, c) = a*b if c=0
- DEBUG(errs() << "AMDIC: " << *CI << " ---> "
- << *opr0 << " * " << *opr1 << "\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * "
+ << *opr1 << "\n");
Value *nval = B.CreateFMul(opr0, opr1, "fmamul");
replaceCall(nval);
return true;
@@ -1263,8 +1259,8 @@ bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B,
if (Constant *FPExpr = getNativeFunction(
CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
Value *opr0 = CI->getArgOperand(0);
- DEBUG(errs() << "AMDIC: " << *CI << " ---> "
- << "sqrt(" << *opr0 << ")\n");
+ LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
+ << "sqrt(" << *opr0 << ")\n");
Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt");
replaceCall(nval);
return true;
@@ -1355,8 +1351,8 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
P = B.CreateAddrSpaceCast(Alloc, PTy);
CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P);
- DEBUG(errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI
- << ") with " << *Call << "\n");
+ LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI << ") with "
+ << *Call << "\n");
if (!isSin) { // CI->cos, UI->sin
B.SetInsertPoint(&*ItOld);
@@ -1719,9 +1715,8 @@ bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
bool Changed = false;
auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- DEBUG(dbgs() << "AMDIC: process function ";
- F.printAsOperand(dbgs(), false, F.getParent());
- dbgs() << '\n';);
+ LLVM_DEBUG(dbgs() << "AMDIC: process function ";
+ F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
if (!EnablePreLink)
Changed |= setFastFlags(F, Options);
@@ -1737,8 +1732,8 @@ bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
Function *Callee = CI->getCalledFunction();
if (Callee == 0) continue;
- DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
- dbgs().flush());
+ LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
+ dbgs().flush());
if(Simplifier.fold(CI, AA))
Changed = true;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
index b1d61fd3ede..612777981f9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
@@ -659,7 +659,7 @@ RegionMRT *MRT::buildMRT(MachineFunction &MF,
continue;
}
- DEBUG(dbgs() << "Visiting " << printMBBReference(*MBB) << "\n");
+ LLVM_DEBUG(dbgs() << "Visiting " << printMBBReference(*MBB) << "\n");
MBBMRT *NewMBB = new MBBMRT(MBB);
MachineRegion *Region = RegionInfo->getRegionFor(MBB);
@@ -696,18 +696,19 @@ void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg,
const TargetRegisterInfo *TRI,
PHILinearize &PHIInfo) {
if (TRI->isVirtualRegister(Reg)) {
- DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI) << "\n");
+ LLVM_DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI)
+ << "\n");
// If this is a source register to a PHI we are chaining, it
// must be live out.
if (PHIInfo.isSource(Reg)) {
- DEBUG(dbgs() << "Add LiveOut (PHI): " << printReg(Reg, TRI) << "\n");
+ LLVM_DEBUG(dbgs() << "Add LiveOut (PHI): " << printReg(Reg, TRI) << "\n");
addLiveOut(Reg);
} else {
// If this is live out of the MBB
for (auto &UI : MRI->use_operands(Reg)) {
if (UI.getParent()->getParent() != MBB) {
- DEBUG(dbgs() << "Add LiveOut (MBB " << printMBBReference(*MBB)
- << "): " << printReg(Reg, TRI) << "\n");
+ LLVM_DEBUG(dbgs() << "Add LiveOut (MBB " << printMBBReference(*MBB)
+ << "): " << printReg(Reg, TRI) << "\n");
addLiveOut(Reg);
} else {
// If the use is in the same MBB we have to make sure
@@ -718,8 +719,8 @@ void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg,
MIE = UseInstr->getParent()->instr_end();
MII != MIE; ++MII) {
if ((&(*MII)) == DefInstr) {
- DEBUG(dbgs() << "Add LiveOut (Loop): " << printReg(Reg, TRI)
- << "\n");
+ LLVM_DEBUG(dbgs() << "Add LiveOut (Loop): " << printReg(Reg, TRI)
+ << "\n");
addLiveOut(Reg);
}
}
@@ -735,11 +736,12 @@ void LinearizedRegion::storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg,
const TargetRegisterInfo *TRI,
PHILinearize &PHIInfo) {
if (TRI->isVirtualRegister(Reg)) {
- DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI) << "\n");
+ LLVM_DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI)
+ << "\n");
for (auto &UI : MRI->use_operands(Reg)) {
if (!Region->contains(UI.getParent()->getParent())) {
- DEBUG(dbgs() << "Add LiveOut (Region " << (void *)Region
- << "): " << printReg(Reg, TRI) << "\n");
+ LLVM_DEBUG(dbgs() << "Add LiveOut (Region " << (void *)Region
+ << "): " << printReg(Reg, TRI) << "\n");
addLiveOut(Reg);
}
}
@@ -750,8 +752,8 @@ void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB,
const MachineRegisterInfo *MRI,
const TargetRegisterInfo *TRI,
PHILinearize &PHIInfo) {
- DEBUG(dbgs() << "-Store Live Outs Begin (" << printMBBReference(*MBB)
- << ")-\n");
+ LLVM_DEBUG(dbgs() << "-Store Live Outs Begin (" << printMBBReference(*MBB)
+ << ")-\n");
for (auto &II : *MBB) {
for (auto &RI : II.defs()) {
storeLiveOutReg(MBB, RI.getReg(), RI.getParent(), MRI, TRI, PHIInfo);
@@ -775,9 +777,10 @@ void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB,
for (int i = 0; i < numPreds; ++i) {
if (getPHIPred(PHI, i) == MBB) {
unsigned PHIReg = getPHISourceReg(PHI, i);
- DEBUG(dbgs() << "Add LiveOut (PhiSource " << printMBBReference(*MBB)
- << " -> " << printMBBReference(*(*SI))
- << "): " << printReg(PHIReg, TRI) << "\n");
+ LLVM_DEBUG(dbgs()
+ << "Add LiveOut (PhiSource " << printMBBReference(*MBB)
+ << " -> " << printMBBReference(*(*SI))
+ << "): " << printReg(PHIReg, TRI) << "\n");
addLiveOut(PHIReg);
}
}
@@ -785,7 +788,7 @@ void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB,
}
}
- DEBUG(dbgs() << "-Store Live Outs Endn-\n");
+ LLVM_DEBUG(dbgs() << "-Store Live Outs Endn-\n");
}
void LinearizedRegion::storeMBBLiveOuts(MachineBasicBlock *MBB,
@@ -845,8 +848,8 @@ void LinearizedRegion::storeLiveOuts(RegionMRT *Region,
for (int i = 0; i < numPreds; ++i) {
if (Region->contains(getPHIPred(PHI, i))) {
unsigned PHIReg = getPHISourceReg(PHI, i);
- DEBUG(dbgs() << "Add Region LiveOut (" << (void *)Region
- << "): " << printReg(PHIReg, TRI) << "\n");
+ LLVM_DEBUG(dbgs() << "Add Region LiveOut (" << (void *)Region
+ << "): " << printReg(PHIReg, TRI) << "\n");
addLiveOut(PHIReg);
}
}
@@ -910,20 +913,21 @@ void LinearizedRegion::replaceRegister(unsigned Register, unsigned NewRegister,
bool IncludeLoopPHI) {
assert(Register != NewRegister && "Cannot replace a reg with itself");
- DEBUG(dbgs() << "Pepareing to replace register (region): "
- << printReg(Register, MRI->getTargetRegisterInfo()) << " with "
- << printReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n");
+ LLVM_DEBUG(
+ dbgs() << "Pepareing to replace register (region): "
+ << printReg(Register, MRI->getTargetRegisterInfo()) << " with "
+ << printReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n");
// If we are replacing outside, we also need to update the LiveOuts
if (ReplaceOutside &&
(isLiveOut(Register) || this->getParent()->isLiveOut(Register))) {
LinearizedRegion *Current = this;
while (Current != nullptr && Current->getEntry() != nullptr) {
- DEBUG(dbgs() << "Region before register replace\n");
- DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo()));
+ LLVM_DEBUG(dbgs() << "Region before register replace\n");
+ LLVM_DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo()));
Current->replaceLiveOut(Register, NewRegister);
- DEBUG(dbgs() << "Region after register replace\n");
- DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo()));
+ LLVM_DEBUG(dbgs() << "Region after register replace\n");
+ LLVM_DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo()));
Current = Current->getParent();
}
}
@@ -947,16 +951,16 @@ void LinearizedRegion::replaceRegister(unsigned Register, unsigned NewRegister,
if (ShouldReplace) {
if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) {
- DEBUG(dbgs() << "Trying to substitute physical register: "
- << printReg(NewRegister, MRI->getTargetRegisterInfo())
- << "\n");
+ LLVM_DEBUG(dbgs() << "Trying to substitute physical register: "
+ << printReg(NewRegister, MRI->getTargetRegisterInfo())
+ << "\n");
llvm_unreachable("Cannot substitute physical registers");
} else {
- DEBUG(dbgs() << "Replacing register (region): "
- << printReg(Register, MRI->getTargetRegisterInfo())
- << " with "
- << printReg(NewRegister, MRI->getTargetRegisterInfo())
- << "\n");
+ LLVM_DEBUG(dbgs() << "Replacing register (region): "
+ << printReg(Register, MRI->getTargetRegisterInfo())
+ << " with "
+ << printReg(NewRegister, MRI->getTargetRegisterInfo())
+ << "\n");
O.setReg(NewRegister);
}
}
@@ -1023,18 +1027,18 @@ void LinearizedRegion::removeFalseRegisterKills(MachineRegisterInfo *MRI) {
if (hasNoDef(Reg, MRI))
continue;
if (!MRI->hasOneDef(Reg)) {
- DEBUG(this->getEntry()->getParent()->dump());
- DEBUG(dbgs() << printReg(Reg, TRI) << "\n");
+ LLVM_DEBUG(this->getEntry()->getParent()->dump());
+ LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << "\n");
}
if (MRI->def_begin(Reg) == MRI->def_end()) {
- DEBUG(dbgs() << "Register "
- << printReg(Reg, MRI->getTargetRegisterInfo())
- << " has NO defs\n");
+ LLVM_DEBUG(dbgs() << "Register "
+ << printReg(Reg, MRI->getTargetRegisterInfo())
+ << " has NO defs\n");
} else if (!MRI->hasOneDef(Reg)) {
- DEBUG(dbgs() << "Register "
- << printReg(Reg, MRI->getTargetRegisterInfo())
- << " has multiple defs\n");
+ LLVM_DEBUG(dbgs() << "Register "
+ << printReg(Reg, MRI->getTargetRegisterInfo())
+ << " has multiple defs\n");
}
assert(MRI->hasOneDef(Reg) && "Register has multiple definitions");
@@ -1042,8 +1046,8 @@ void LinearizedRegion::removeFalseRegisterKills(MachineRegisterInfo *MRI) {
MachineOperand *UseOperand = &(RI);
bool UseIsOutsideDefMBB = Def->getParent()->getParent() != MBB;
if (UseIsOutsideDefMBB && UseOperand->isKill()) {
- DEBUG(dbgs() << "Removing kill flag on register: "
- << printReg(Reg, TRI) << "\n");
+ LLVM_DEBUG(dbgs() << "Removing kill flag on register: "
+ << printReg(Reg, TRI) << "\n");
UseOperand->setIsKill(false);
}
}
@@ -1416,8 +1420,8 @@ void AMDGPUMachineCFGStructurizer::extractKilledPHIs(MachineBasicBlock *MBB) {
MachineInstr &Instr = *I;
if (Instr.isPHI()) {
unsigned PHIDestReg = getPHIDestReg(Instr);
- DEBUG(dbgs() << "Extractking killed phi:\n");
- DEBUG(Instr.dump());
+ LLVM_DEBUG(dbgs() << "Extractking killed phi:\n");
+ LLVM_DEBUG(Instr.dump());
PHIs.insert(&Instr);
PHIInfo.addDest(PHIDestReg, Instr.getDebugLoc());
storePHILinearizationInfoDest(PHIDestReg, Instr);
@@ -1449,9 +1453,10 @@ bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI,
MachineBasicBlock *SourceMBB,
SmallVector<unsigned, 2> &PHIIndices,
unsigned *ReplaceReg) {
- DEBUG(dbgs() << "Shrink PHI: ");
- DEBUG(PHI.dump());
- DEBUG(dbgs() << " to " << printReg(getPHIDestReg(PHI), TRI) << " = PHI(");
+ LLVM_DEBUG(dbgs() << "Shrink PHI: ");
+ LLVM_DEBUG(PHI.dump());
+ LLVM_DEBUG(dbgs() << " to " << printReg(getPHIDestReg(PHI), TRI)
+ << " = PHI(");
bool Replaced = false;
unsigned NumInputs = getPHINumInputs(PHI);
@@ -1481,8 +1486,8 @@ bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI,
if (SourceMBB) {
MIB.addReg(CombinedSourceReg);
MIB.addMBB(SourceMBB);
- DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", "
- << printMBBReference(*SourceMBB));
+ LLVM_DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", "
+ << printMBBReference(*SourceMBB));
}
for (unsigned i = 0; i < NumInputs; ++i) {
@@ -1493,10 +1498,10 @@ bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI,
MachineBasicBlock *SourcePred = getPHIPred(PHI, i);
MIB.addReg(SourceReg);
MIB.addMBB(SourcePred);
- DEBUG(dbgs() << printReg(SourceReg, TRI) << ", "
- << printMBBReference(*SourcePred));
+ LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", "
+ << printMBBReference(*SourcePred));
}
- DEBUG(dbgs() << ")\n");
+ LLVM_DEBUG(dbgs() << ")\n");
}
PHI.eraseFromParent();
return Replaced;
@@ -1505,9 +1510,10 @@ bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI,
void AMDGPUMachineCFGStructurizer::replacePHI(
MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *LastMerge,
SmallVector<unsigned, 2> &PHIRegionIndices) {
- DEBUG(dbgs() << "Replace PHI: ");
- DEBUG(PHI.dump());
- DEBUG(dbgs() << " with " << printReg(getPHIDestReg(PHI), TRI) << " = PHI(");
+ LLVM_DEBUG(dbgs() << "Replace PHI: ");
+ LLVM_DEBUG(PHI.dump());
+ LLVM_DEBUG(dbgs() << " with " << printReg(getPHIDestReg(PHI), TRI)
+ << " = PHI(");
bool HasExternalEdge = false;
unsigned NumInputs = getPHINumInputs(PHI);
@@ -1524,8 +1530,8 @@ void AMDGPUMachineCFGStructurizer::replacePHI(
getPHIDestReg(PHI));
MIB.addReg(CombinedSourceReg);
MIB.addMBB(LastMerge);
- DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", "
- << printMBBReference(*LastMerge));
+ LLVM_DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", "
+ << printMBBReference(*LastMerge));
for (unsigned i = 0; i < NumInputs; ++i) {
if (isPHIRegionIndex(PHIRegionIndices, i)) {
continue;
@@ -1534,10 +1540,10 @@ void AMDGPUMachineCFGStructurizer::replacePHI(
MachineBasicBlock *SourcePred = getPHIPred(PHI, i);
MIB.addReg(SourceReg);
MIB.addMBB(SourcePred);
- DEBUG(dbgs() << printReg(SourceReg, TRI) << ", "
- << printMBBReference(*SourcePred));
+ LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", "
+ << printMBBReference(*SourcePred));
}
- DEBUG(dbgs() << ")\n");
+ LLVM_DEBUG(dbgs() << ")\n");
} else {
replaceRegisterWith(getPHIDestReg(PHI), CombinedSourceReg);
}
@@ -1547,9 +1553,9 @@ void AMDGPUMachineCFGStructurizer::replacePHI(
void AMDGPUMachineCFGStructurizer::replaceEntryPHI(
MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *IfMBB,
SmallVector<unsigned, 2> &PHIRegionIndices) {
- DEBUG(dbgs() << "Replace entry PHI: ");
- DEBUG(PHI.dump());
- DEBUG(dbgs() << " with ");
+ LLVM_DEBUG(dbgs() << "Replace entry PHI: ");
+ LLVM_DEBUG(PHI.dump());
+ LLVM_DEBUG(dbgs() << " with ");
unsigned NumInputs = getPHINumInputs(PHI);
unsigned NumNonRegionInputs = NumInputs;
@@ -1562,18 +1568,19 @@ void AMDGPUMachineCFGStructurizer::replaceEntryPHI(
if (NumNonRegionInputs == 0) {
auto DestReg = getPHIDestReg(PHI);
replaceRegisterWith(DestReg, CombinedSourceReg);
- DEBUG(dbgs() << " register " << printReg(CombinedSourceReg, TRI) << "\n");
+ LLVM_DEBUG(dbgs() << " register " << printReg(CombinedSourceReg, TRI)
+ << "\n");
PHI.eraseFromParent();
} else {
- DEBUG(dbgs() << printReg(getPHIDestReg(PHI), TRI) << " = PHI(");
+ LLVM_DEBUG(dbgs() << printReg(getPHIDestReg(PHI), TRI) << " = PHI(");
MachineBasicBlock *MBB = PHI.getParent();
MachineInstrBuilder MIB =
BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI),
getPHIDestReg(PHI));
MIB.addReg(CombinedSourceReg);
MIB.addMBB(IfMBB);
- DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", "
- << printMBBReference(*IfMBB));
+ LLVM_DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", "
+ << printMBBReference(*IfMBB));
unsigned NumInputs = getPHINumInputs(PHI);
for (unsigned i = 0; i < NumInputs; ++i) {
if (isPHIRegionIndex(PHIRegionIndices, i)) {
@@ -1583,10 +1590,10 @@ void AMDGPUMachineCFGStructurizer::replaceEntryPHI(
MachineBasicBlock *SourcePred = getPHIPred(PHI, i);
MIB.addReg(SourceReg);
MIB.addMBB(SourcePred);
- DEBUG(dbgs() << printReg(SourceReg, TRI) << ", "
- << printMBBReference(*SourcePred));
+ LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", "
+ << printMBBReference(*SourcePred));
}
- DEBUG(dbgs() << ")\n");
+ LLVM_DEBUG(dbgs() << ")\n");
PHI.eraseFromParent();
}
}
@@ -1608,8 +1615,9 @@ void AMDGPUMachineCFGStructurizer::replaceLiveOutRegs(
}
}
- DEBUG(dbgs() << "Register " << printReg(Reg, TRI) << " is "
- << (IsDead ? "dead" : "alive") << " after PHI replace\n");
+ LLVM_DEBUG(dbgs() << "Register " << printReg(Reg, TRI) << " is "
+ << (IsDead ? "dead" : "alive")
+ << " after PHI replace\n");
if (IsDead) {
LRegion->removeLiveOut(Reg);
}
@@ -1683,8 +1691,8 @@ void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHIs(LinearizedRegion *Regi
void AMDGPUMachineCFGStructurizer::insertUnconditionalBranch(MachineBasicBlock *MBB,
MachineBasicBlock *Dest,
const DebugLoc &DL) {
- DEBUG(dbgs() << "Inserting unconditional branch: " << MBB->getNumber()
- << " -> " << Dest->getNumber() << "\n");
+ LLVM_DEBUG(dbgs() << "Inserting unconditional branch: " << MBB->getNumber()
+ << " -> " << Dest->getNumber() << "\n");
MachineBasicBlock::instr_iterator Terminator = MBB->getFirstInstrTerminator();
bool HasTerminator = Terminator != MBB->instr_end();
if (HasTerminator) {
@@ -1733,7 +1741,8 @@ AMDGPUMachineCFGStructurizer::createLinearizedExitBlock(RegionMRT *Region) {
MF->insert(ExitIter, LastMerge);
LastMerge->addSuccessor(Exit);
insertUnconditionalBranch(LastMerge, Exit);
- DEBUG(dbgs() << "Created exit block: " << LastMerge->getNumber() << "\n");
+ LLVM_DEBUG(dbgs() << "Created exit block: " << LastMerge->getNumber()
+ << "\n");
}
return LastMerge;
}
@@ -1749,11 +1758,12 @@ void AMDGPUMachineCFGStructurizer::insertMergePHI(MachineBasicBlock *IfBB,
if (MergeBB->succ_begin() == MergeBB->succ_end()) {
return;
}
- DEBUG(dbgs() << "Merge PHI (" << printMBBReference(*MergeBB)
- << "): " << printReg(DestRegister, TRI) << " = PHI("
- << printReg(IfSourceRegister, TRI) << ", "
- << printMBBReference(*IfBB) << printReg(CodeSourceRegister, TRI)
- << ", " << printMBBReference(*CodeBB) << ")\n");
+ LLVM_DEBUG(dbgs() << "Merge PHI (" << printMBBReference(*MergeBB)
+ << "): " << printReg(DestRegister, TRI) << " = PHI("
+ << printReg(IfSourceRegister, TRI) << ", "
+ << printMBBReference(*IfBB)
+ << printReg(CodeSourceRegister, TRI) << ", "
+ << printMBBReference(*CodeBB) << ")\n");
const DebugLoc &DL = MergeBB->findDebugLoc(MergeBB->begin());
MachineInstrBuilder MIB = BuildMI(*MergeBB, MergeBB->instr_begin(), DL,
TII->get(TargetOpcode::PHI), DestRegister);
@@ -1811,8 +1821,8 @@ static void removeExternalCFGEdges(MachineBasicBlock *StartMBB,
for (auto SI : Succs) {
std::pair<MachineBasicBlock *, MachineBasicBlock *> Edge = SI;
- DEBUG(dbgs() << "Removing edge: " << printMBBReference(*Edge.first)
- << " -> " << printMBBReference(*Edge.second) << "\n");
+ LLVM_DEBUG(dbgs() << "Removing edge: " << printMBBReference(*Edge.first)
+ << " -> " << printMBBReference(*Edge.second) << "\n");
Edge.first->removeSuccessor(Edge.second);
}
}
@@ -1845,13 +1855,13 @@ MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfBlock(
IfBB->addSuccessor(MergeBB);
IfBB->addSuccessor(CodeBBStart);
- DEBUG(dbgs() << "Created If block: " << IfBB->getNumber() << "\n");
+ LLVM_DEBUG(dbgs() << "Created If block: " << IfBB->getNumber() << "\n");
// Ensure that the MergeBB is a successor of the CodeEndBB.
if (!CodeBBEnd->isSuccessor(MergeBB))
CodeBBEnd->addSuccessor(MergeBB);
- DEBUG(dbgs() << "Moved " << printMBBReference(*CodeBBStart) << " through "
- << printMBBReference(*CodeBBEnd) << "\n");
+ LLVM_DEBUG(dbgs() << "Moved " << printMBBReference(*CodeBBStart)
+ << " through " << printMBBReference(*CodeBBEnd) << "\n");
// If we have a single predecessor we can find a reasonable debug location
MachineBasicBlock *SinglePred =
@@ -1936,16 +1946,18 @@ void AMDGPUMachineCFGStructurizer::rewriteCodeBBTerminator(MachineBasicBlock *Co
MachineInstr *AMDGPUMachineCFGStructurizer::getDefInstr(unsigned Reg) {
if (MRI->def_begin(Reg) == MRI->def_end()) {
- DEBUG(dbgs() << "Register " << printReg(Reg, MRI->getTargetRegisterInfo())
- << " has NO defs\n");
+ LLVM_DEBUG(dbgs() << "Register "
+ << printReg(Reg, MRI->getTargetRegisterInfo())
+ << " has NO defs\n");
} else if (!MRI->hasOneDef(Reg)) {
- DEBUG(dbgs() << "Register " << printReg(Reg, MRI->getTargetRegisterInfo())
- << " has multiple defs\n");
- DEBUG(dbgs() << "DEFS BEGIN:\n");
+ LLVM_DEBUG(dbgs() << "Register "
+ << printReg(Reg, MRI->getTargetRegisterInfo())
+ << " has multiple defs\n");
+ LLVM_DEBUG(dbgs() << "DEFS BEGIN:\n");
for (auto DI = MRI->def_begin(Reg), DE = MRI->def_end(); DI != DE; ++DI) {
- DEBUG(DI->getParent()->dump());
+ LLVM_DEBUG(DI->getParent()->dump());
}
- DEBUG(dbgs() << "DEFS END\n");
+ LLVM_DEBUG(dbgs() << "DEFS END\n");
}
assert(MRI->hasOneDef(Reg) && "Register has multiple definitions");
@@ -1987,7 +1999,7 @@ void AMDGPUMachineCFGStructurizer::insertChainedPHI(MachineBasicBlock *IfBB,
const TargetRegisterClass *RegClass = MRI->getRegClass(DestReg);
unsigned NextDestReg = MRI->createVirtualRegister(RegClass);
bool IsLastDef = PHIInfo.getNumSources(DestReg) == 1;
- DEBUG(dbgs() << "Insert Chained PHI\n");
+ LLVM_DEBUG(dbgs() << "Insert Chained PHI\n");
insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, DestReg, NextDestReg,
SourceReg, IsLastDef);
@@ -2023,16 +2035,16 @@ void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB,
}
for (auto LI : OldLiveOuts) {
- DEBUG(dbgs() << "LiveOut: " << printReg(LI, TRI));
+ LLVM_DEBUG(dbgs() << "LiveOut: " << printReg(LI, TRI));
if (!containsDef(CodeBB, InnerRegion, LI) ||
(!IsSingleBB && (getDefInstr(LI)->getParent() == LRegion->getExit()))) {
// If the register simly lives through the CodeBB, we don't have
// to rewrite anything since the register is not defined in this
// part of the code.
- DEBUG(dbgs() << "- through");
+ LLVM_DEBUG(dbgs() << "- through");
continue;
}
- DEBUG(dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "\n");
unsigned Reg = LI;
if (/*!PHIInfo.isSource(Reg) &&*/ Reg != InnerRegion->getBBSelectRegOut()) {
// If the register is live out, we do want to create a phi,
@@ -2049,12 +2061,12 @@ void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB,
unsigned IfSourceReg = MRI->createVirtualRegister(RegClass);
// Create initializer, this value is never used, but is needed
// to satisfy SSA.
- DEBUG(dbgs() << "Initializer for reg: " << printReg(Reg) << "\n");
+ LLVM_DEBUG(dbgs() << "Initializer for reg: " << printReg(Reg) << "\n");
TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DebugLoc(),
IfSourceReg, 0);
InnerRegion->replaceRegisterOutsideRegion(Reg, PHIDestReg, true, MRI);
- DEBUG(dbgs() << "Insert Non-Chained Live out PHI\n");
+ LLVM_DEBUG(dbgs() << "Insert Non-Chained Live out PHI\n");
insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, PHIDestReg,
IfSourceReg, Reg, true);
}
@@ -2064,22 +2076,22 @@ void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB,
// is a source block for a definition.
SmallVector<unsigned, 4> Sources;
if (PHIInfo.findSourcesFromMBB(CodeBB, Sources)) {
- DEBUG(dbgs() << "Inserting PHI Live Out from " << printMBBReference(*CodeBB)
- << "\n");
+ LLVM_DEBUG(dbgs() << "Inserting PHI Live Out from "
+ << printMBBReference(*CodeBB) << "\n");
for (auto SI : Sources) {
unsigned DestReg;
PHIInfo.findDest(SI, CodeBB, DestReg);
insertChainedPHI(IfBB, CodeBB, MergeBB, InnerRegion, DestReg, SI);
}
- DEBUG(dbgs() << "Insertion done.\n");
+ LLVM_DEBUG(dbgs() << "Insertion done.\n");
}
- DEBUG(PHIInfo.dump(MRI));
+ LLVM_DEBUG(PHIInfo.dump(MRI));
}
void AMDGPUMachineCFGStructurizer::prunePHIInfo(MachineBasicBlock *MBB) {
- DEBUG(dbgs() << "Before PHI Prune\n");
- DEBUG(PHIInfo.dump(MRI));
+ LLVM_DEBUG(dbgs() << "Before PHI Prune\n");
+ LLVM_DEBUG(PHIInfo.dump(MRI));
SmallVector<std::tuple<unsigned, unsigned, MachineBasicBlock *>, 4>
ElimiatedSources;
for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE;
@@ -2119,8 +2131,8 @@ void AMDGPUMachineCFGStructurizer::prunePHIInfo(MachineBasicBlock *MBB) {
PHIInfo.removeSource(std::get<0>(SourceInfo), std::get<1>(SourceInfo),
std::get<2>(SourceInfo));
}
- DEBUG(dbgs() << "After PHI Prune\n");
- DEBUG(PHIInfo.dump(MRI));
+ LLVM_DEBUG(dbgs() << "After PHI Prune\n");
+ LLVM_DEBUG(PHIInfo.dump(MRI));
}
void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegion,
@@ -2128,8 +2140,8 @@ void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegio
MachineBasicBlock *Entry = CurrentRegion->getEntry();
MachineBasicBlock *Exit = CurrentRegion->getExit();
- DEBUG(dbgs() << "RegionExit: " << Exit->getNumber()
- << " Pred: " << (*(Entry->pred_begin()))->getNumber() << "\n");
+ LLVM_DEBUG(dbgs() << "RegionExit: " << Exit->getNumber() << " Pred: "
+ << (*(Entry->pred_begin()))->getNumber() << "\n");
int NumSources = 0;
auto SE = PHIInfo.sources_end(DestReg);
@@ -2146,7 +2158,7 @@ void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegio
const DebugLoc &DL = Entry->findDebugLoc(Entry->begin());
MachineInstrBuilder MIB = BuildMI(*Entry, Entry->instr_begin(), DL,
TII->get(TargetOpcode::PHI), DestReg);
- DEBUG(dbgs() << "Entry PHI " << printReg(DestReg, TRI) << " = PHI(");
+ LLVM_DEBUG(dbgs() << "Entry PHI " << printReg(DestReg, TRI) << " = PHI(");
unsigned CurrentBackedgeReg = 0;
@@ -2170,19 +2182,19 @@ void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegio
BackedgePHI.addReg(getPHISourceReg(*PHIDefInstr, 1));
BackedgePHI.addMBB((*SRI).second);
CurrentBackedgeReg = NewBackedgeReg;
- DEBUG(dbgs() << "Inserting backedge PHI: "
- << printReg(NewBackedgeReg, TRI) << " = PHI("
- << printReg(CurrentBackedgeReg, TRI) << ", "
- << printMBBReference(*getPHIPred(*PHIDefInstr, 0))
- << ", "
- << printReg(getPHISourceReg(*PHIDefInstr, 1), TRI)
- << ", " << printMBBReference(*(*SRI).second));
+ LLVM_DEBUG(dbgs()
+ << "Inserting backedge PHI: "
+ << printReg(NewBackedgeReg, TRI) << " = PHI("
+ << printReg(CurrentBackedgeReg, TRI) << ", "
+ << printMBBReference(*getPHIPred(*PHIDefInstr, 0)) << ", "
+ << printReg(getPHISourceReg(*PHIDefInstr, 1), TRI) << ", "
+ << printMBBReference(*(*SRI).second));
}
} else {
MIB.addReg(SourceReg);
MIB.addMBB((*SRI).second);
- DEBUG(dbgs() << printReg(SourceReg, TRI) << ", "
- << printMBBReference(*(*SRI).second) << ", ");
+ LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", "
+ << printMBBReference(*(*SRI).second) << ", ");
}
}
@@ -2190,16 +2202,16 @@ void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegio
if (CurrentBackedgeReg != 0) {
MIB.addReg(CurrentBackedgeReg);
MIB.addMBB(Exit);
- DEBUG(dbgs() << printReg(CurrentBackedgeReg, TRI) << ", "
- << printMBBReference(*Exit) << ")\n");
+ LLVM_DEBUG(dbgs() << printReg(CurrentBackedgeReg, TRI) << ", "
+ << printMBBReference(*Exit) << ")\n");
} else {
- DEBUG(dbgs() << ")\n");
+ LLVM_DEBUG(dbgs() << ")\n");
}
}
}
void AMDGPUMachineCFGStructurizer::createEntryPHIs(LinearizedRegion *CurrentRegion) {
- DEBUG(PHIInfo.dump(MRI));
+ LLVM_DEBUG(PHIInfo.dump(MRI));
for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE;
++DRI) {
@@ -2220,19 +2232,19 @@ void AMDGPUMachineCFGStructurizer::replaceRegisterWith(unsigned Register,
MachineOperand &O = *I;
++I;
if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) {
- DEBUG(dbgs() << "Trying to substitute physical register: "
- << printReg(NewRegister, MRI->getTargetRegisterInfo())
- << "\n");
+ LLVM_DEBUG(dbgs() << "Trying to substitute physical register: "
+ << printReg(NewRegister, MRI->getTargetRegisterInfo())
+ << "\n");
llvm_unreachable("Cannot substitute physical registers");
// We don't handle physical registers, but if we need to
// in the future This is how we do it:
// O.substPhysReg(NewRegister, *TRI);
} else {
- DEBUG(dbgs() << "Replacing register: "
- << printReg(Register, MRI->getTargetRegisterInfo())
- << " with "
- << printReg(NewRegister, MRI->getTargetRegisterInfo())
- << "\n");
+ LLVM_DEBUG(dbgs() << "Replacing register: "
+ << printReg(Register, MRI->getTargetRegisterInfo())
+ << " with "
+ << printReg(NewRegister, MRI->getTargetRegisterInfo())
+ << "\n");
O.setReg(NewRegister);
}
}
@@ -2240,20 +2252,20 @@ void AMDGPUMachineCFGStructurizer::replaceRegisterWith(unsigned Register,
getRegionMRT()->replaceLiveOutReg(Register, NewRegister);
- DEBUG(PHIInfo.dump(MRI));
+ LLVM_DEBUG(PHIInfo.dump(MRI));
}
void AMDGPUMachineCFGStructurizer::resolvePHIInfos(MachineBasicBlock *FunctionEntry) {
- DEBUG(dbgs() << "Resolve PHI Infos\n");
- DEBUG(PHIInfo.dump(MRI));
+ LLVM_DEBUG(dbgs() << "Resolve PHI Infos\n");
+ LLVM_DEBUG(PHIInfo.dump(MRI));
for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE;
++DRI) {
unsigned DestReg = *DRI;
- DEBUG(dbgs() << "DestReg: " << printReg(DestReg, TRI) << "\n");
+ LLVM_DEBUG(dbgs() << "DestReg: " << printReg(DestReg, TRI) << "\n");
auto SRI = PHIInfo.sources_begin(DestReg);
unsigned SourceReg = (*SRI).first;
- DEBUG(dbgs() << "DestReg: " << printReg(DestReg, TRI)
- << " SourceReg: " << printReg(SourceReg, TRI) << "\n");
+ LLVM_DEBUG(dbgs() << "DestReg: " << printReg(DestReg, TRI)
+ << " SourceReg: " << printReg(SourceReg, TRI) << "\n");
assert(PHIInfo.sources_end(DestReg) == ++SRI &&
"More than one phi source in entry node");
@@ -2327,9 +2339,9 @@ MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion(
MachineOperand RegOp =
MachineOperand::CreateReg(Reg, false, false, true);
ArrayRef<MachineOperand> Cond(RegOp);
- DEBUG(dbgs() << "RegionExitReg: ");
- DEBUG(Cond[0].print(dbgs(), TRI));
- DEBUG(dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "RegionExitReg: ");
+ LLVM_DEBUG(Cond[0].print(dbgs(), TRI));
+ LLVM_DEBUG(dbgs() << "\n");
TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit,
Cond, DebugLoc());
RegionExit->addSuccessor(CurrentRegion->getEntry());
@@ -2339,12 +2351,12 @@ MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion(
LinearizedRegion InnerRegion(CodeBB, MRI, TRI, PHIInfo);
InnerRegion.setParent(CurrentRegion);
- DEBUG(dbgs() << "Insert BB Select PHI (BB)\n");
+ LLVM_DEBUG(dbgs() << "Insert BB Select PHI (BB)\n");
insertMergePHI(IfBB, CodeBB, MergeBB, BBSelectRegOut, BBSelectRegIn,
CodeBBSelectReg);
InnerRegion.addMBB(MergeBB);
- DEBUG(InnerRegion.print(dbgs(), TRI));
+ LLVM_DEBUG(InnerRegion.print(dbgs(), TRI));
rewriteLiveOutRegs(IfBB, CodeBB, MergeBB, &InnerRegion, CurrentRegion);
extractKilledPHIs(CodeBB);
if (IsRegionEntryBB) {
@@ -2385,16 +2397,16 @@ MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion(
CurrentRegion->getRegionMRT()->getEntry()->getNumber());
MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true);
ArrayRef<MachineOperand> Cond(RegOp);
- DEBUG(dbgs() << "RegionExitReg: ");
- DEBUG(Cond[0].print(dbgs(), TRI));
- DEBUG(dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "RegionExitReg: ");
+ LLVM_DEBUG(Cond[0].print(dbgs(), TRI));
+ LLVM_DEBUG(dbgs() << "\n");
TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit,
Cond, DebugLoc());
RegionExit->addSuccessor(IfBB);
}
}
CurrentRegion->addMBBs(InnerRegion);
- DEBUG(dbgs() << "Insert BB Select PHI (region)\n");
+ LLVM_DEBUG(dbgs() << "Insert BB Select PHI (region)\n");
insertMergePHI(IfBB, CodeExitBB, MergeBB, BBSelectRegOut, BBSelectRegIn,
CodeBBSelectReg);
@@ -2440,15 +2452,16 @@ void AMDGPUMachineCFGStructurizer::splitLoopPHI(MachineInstr &PHI,
MachineInstrBuilder MIB =
BuildMI(*EntrySucc, EntrySucc->instr_begin(), PHI.getDebugLoc(),
TII->get(TargetOpcode::PHI), NewDestReg);
- DEBUG(dbgs() << "Split Entry PHI " << printReg(NewDestReg, TRI) << " = PHI(");
+ LLVM_DEBUG(dbgs() << "Split Entry PHI " << printReg(NewDestReg, TRI)
+ << " = PHI(");
MIB.addReg(PHISource);
MIB.addMBB(Entry);
- DEBUG(dbgs() << printReg(PHISource, TRI) << ", "
- << printMBBReference(*Entry));
+ LLVM_DEBUG(dbgs() << printReg(PHISource, TRI) << ", "
+ << printMBBReference(*Entry));
MIB.addReg(RegionSourceReg);
MIB.addMBB(RegionSourceMBB);
- DEBUG(dbgs() << " ," << printReg(RegionSourceReg, TRI) << ", "
- << printMBBReference(*RegionSourceMBB) << ")\n");
+ LLVM_DEBUG(dbgs() << " ," << printReg(RegionSourceReg, TRI) << ", "
+ << printMBBReference(*RegionSourceMBB) << ")\n");
}
void AMDGPUMachineCFGStructurizer::splitLoopPHIs(MachineBasicBlock *Entry,
@@ -2481,7 +2494,8 @@ AMDGPUMachineCFGStructurizer::splitExit(LinearizedRegion *LRegion) {
LRegion->addMBB(NewExit);
LRegion->setExit(NewExit);
- DEBUG(dbgs() << "Created new exit block: " << NewExit->getNumber() << "\n");
+ LLVM_DEBUG(dbgs() << "Created new exit block: " << NewExit->getNumber()
+ << "\n");
// Replace any PHI Predecessors in the successor with NewExit
for (auto &II : *Succ) {
@@ -2529,9 +2543,9 @@ AMDGPUMachineCFGStructurizer::splitEntry(LinearizedRegion *LRegion) {
MachineBasicBlock *EntrySucc = split(Entry->getFirstNonPHI());
MachineBasicBlock *Exit = LRegion->getExit();
- DEBUG(dbgs() << "Split " << printMBBReference(*Entry) << " to "
- << printMBBReference(*Entry) << " -> "
- << printMBBReference(*EntrySucc) << "\n");
+ LLVM_DEBUG(dbgs() << "Split " << printMBBReference(*Entry) << " to "
+ << printMBBReference(*Entry) << " -> "
+ << printMBBReference(*EntrySucc) << "\n");
LRegion->addMBB(EntrySucc);
// Make the backedge go to Entry Succ
@@ -2622,21 +2636,21 @@ bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) {
rewriteRegionExitPHIs(Region, LastMerge, LRegion);
removeOldExitPreds(Region);
- DEBUG(PHIInfo.dump(MRI));
+ LLVM_DEBUG(PHIInfo.dump(MRI));
SetVector<MRT *> *Children = Region->getChildren();
- DEBUG(dbgs() << "===========If Region Start===============\n");
+ LLVM_DEBUG(dbgs() << "===========If Region Start===============\n");
if (LRegion->getHasLoop()) {
- DEBUG(dbgs() << "Has Backedge: Yes\n");
+ LLVM_DEBUG(dbgs() << "Has Backedge: Yes\n");
} else {
- DEBUG(dbgs() << "Has Backedge: No\n");
+ LLVM_DEBUG(dbgs() << "Has Backedge: No\n");
}
unsigned BBSelectRegIn;
unsigned BBSelectRegOut;
for (auto CI = Children->begin(), CE = Children->end(); CI != CE; ++CI) {
- DEBUG(dbgs() << "CurrentRegion: \n");
- DEBUG(LRegion->print(dbgs(), TRI));
+ LLVM_DEBUG(dbgs() << "CurrentRegion: \n");
+ LLVM_DEBUG(LRegion->print(dbgs(), TRI));
auto CNI = CI;
++CNI;
@@ -2650,9 +2664,9 @@ bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) {
// We found the block is the exit of an inner region, we need
// to put it in the current linearized region.
- DEBUG(dbgs() << "Linearizing region: ");
- DEBUG(InnerLRegion->print(dbgs(), TRI));
- DEBUG(dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Linearizing region: ");
+ LLVM_DEBUG(InnerLRegion->print(dbgs(), TRI));
+ LLVM_DEBUG(dbgs() << "\n");
MachineBasicBlock *InnerEntry = InnerLRegion->getEntry();
if ((&(*(InnerEntry->getParent()->begin()))) == InnerEntry) {
@@ -2670,10 +2684,10 @@ bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) {
BBSelectRegOut = Child->getBBSelectRegOut();
BBSelectRegIn = Child->getBBSelectRegIn();
- DEBUG(dbgs() << "BBSelectRegIn: " << printReg(BBSelectRegIn, TRI)
- << "\n");
- DEBUG(dbgs() << "BBSelectRegOut: " << printReg(BBSelectRegOut, TRI)
- << "\n");
+ LLVM_DEBUG(dbgs() << "BBSelectRegIn: " << printReg(BBSelectRegIn, TRI)
+ << "\n");
+ LLVM_DEBUG(dbgs() << "BBSelectRegOut: " << printReg(BBSelectRegOut, TRI)
+ << "\n");
MachineBasicBlock *IfEnd = CurrentMerge;
CurrentMerge = createIfRegion(CurrentMerge, InnerLRegion, LRegion,
@@ -2682,7 +2696,7 @@ bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) {
TII->convertNonUniformIfRegion(CurrentMerge, IfEnd);
} else {
MachineBasicBlock *MBB = Child->getMBBMRT()->getMBB();
- DEBUG(dbgs() << "Linearizing block: " << MBB->getNumber() << "\n");
+ LLVM_DEBUG(dbgs() << "Linearizing block: " << MBB->getNumber() << "\n");
if (MBB == getSingleExitNode(*(MBB->getParent()))) {
// If this is the exit block then we need to skip to the next.
@@ -2694,10 +2708,10 @@ bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) {
BBSelectRegOut = Child->getBBSelectRegOut();
BBSelectRegIn = Child->getBBSelectRegIn();
- DEBUG(dbgs() << "BBSelectRegIn: " << printReg(BBSelectRegIn, TRI)
- << "\n");
- DEBUG(dbgs() << "BBSelectRegOut: " << printReg(BBSelectRegOut, TRI)
- << "\n");
+ LLVM_DEBUG(dbgs() << "BBSelectRegIn: " << printReg(BBSelectRegIn, TRI)
+ << "\n");
+ LLVM_DEBUG(dbgs() << "BBSelectRegOut: " << printReg(BBSelectRegOut, TRI)
+ << "\n");
MachineBasicBlock *IfEnd = CurrentMerge;
// This is a basic block that is not part of an inner region, we
@@ -2708,7 +2722,7 @@ bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) {
TII->convertNonUniformIfRegion(CurrentMerge, IfEnd);
}
- DEBUG(PHIInfo.dump(MRI));
+ LLVM_DEBUG(PHIInfo.dump(MRI));
}
}
@@ -2729,7 +2743,7 @@ bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) {
NewInReg, Region->getEntry()->getNumber());
// Need to be careful about updating the registers inside the region.
LRegion->replaceRegisterInsideRegion(InReg, InnerSelectReg, false, MRI);
- DEBUG(dbgs() << "Loop BBSelect Merge PHI:\n");
+ LLVM_DEBUG(dbgs() << "Loop BBSelect Merge PHI:\n");
insertMergePHI(LRegion->getEntry(), LRegion->getExit(), NewSucc,
InnerSelectReg, NewInReg,
LRegion->getRegionMRT()->getInnerOutputRegister());
@@ -2741,11 +2755,11 @@ bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) {
TII->insertReturn(*LastMerge);
}
- DEBUG(Region->getEntry()->getParent()->dump());
- DEBUG(LRegion->print(dbgs(), TRI));
- DEBUG(PHIInfo.dump(MRI));
+ LLVM_DEBUG(Region->getEntry()->getParent()->dump());
+ LLVM_DEBUG(LRegion->print(dbgs(), TRI));
+ LLVM_DEBUG(PHIInfo.dump(MRI));
- DEBUG(dbgs() << "===========If Region End===============\n");
+ LLVM_DEBUG(dbgs() << "===========If Region End===============\n");
Region->setLinearizedRegion(LRegion);
return true;
@@ -2785,12 +2799,12 @@ bool AMDGPUMachineCFGStructurizer::structurizeRegions(RegionMRT *Region,
}
void AMDGPUMachineCFGStructurizer::initFallthroughMap(MachineFunction &MF) {
- DEBUG(dbgs() << "Fallthrough Map:\n");
+ LLVM_DEBUG(dbgs() << "Fallthrough Map:\n");
for (auto &MBBI : MF) {
MachineBasicBlock *MBB = MBBI.getFallThrough();
if (MBB != nullptr) {
- DEBUG(dbgs() << "Fallthrough: " << MBBI.getNumber() << " -> "
- << MBB->getNumber() << "\n");
+ LLVM_DEBUG(dbgs() << "Fallthrough: " << MBBI.getNumber() << " -> "
+ << MBB->getNumber() << "\n");
}
FallthroughMap[&MBBI] = MBB;
}
@@ -2801,8 +2815,8 @@ void AMDGPUMachineCFGStructurizer::createLinearizedRegion(RegionMRT *Region,
LinearizedRegion *LRegion = new LinearizedRegion();
if (SelectOut) {
LRegion->addLiveOut(SelectOut);
- DEBUG(dbgs() << "Add LiveOut (BBSelect): " << printReg(SelectOut, TRI)
- << "\n");
+ LLVM_DEBUG(dbgs() << "Add LiveOut (BBSelect): " << printReg(SelectOut, TRI)
+ << "\n");
}
LRegion->setRegionMRT(Region);
Region->setLinearizedRegion(LRegion);
@@ -2864,19 +2878,19 @@ bool AMDGPUMachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) {
initFallthroughMap(MF);
checkRegOnlyPHIInputs(MF);
- DEBUG(dbgs() << "----STRUCTURIZER START----\n");
- DEBUG(MF.dump());
+ LLVM_DEBUG(dbgs() << "----STRUCTURIZER START----\n");
+ LLVM_DEBUG(MF.dump());
Regions = &(getAnalysis<MachineRegionInfoPass>().getRegionInfo());
- DEBUG(Regions->dump());
+ LLVM_DEBUG(Regions->dump());
RegionMRT *RTree = MRT::buildMRT(MF, Regions, TII, MRI);
setRegionMRT(RTree);
initializeSelectRegisters(RTree, 0, MRI, TII);
- DEBUG(RTree->dump(TRI));
+ LLVM_DEBUG(RTree->dump(TRI));
bool result = structurizeRegions(RTree, true);
delete RTree;
- DEBUG(dbgs() << "----STRUCTURIZER END----\n");
+ LLVM_DEBUG(dbgs() << "----STRUCTURIZER END----\n");
initFallthroughMap(MF);
return result;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
index 265104a8643..4ff6c6e01c6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
@@ -114,7 +114,7 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) {
M.getDataLayout());
F.setName(Name);
}
- DEBUG(dbgs() << "found enqueued kernel: " << F.getName() << '\n');
+ LLVM_DEBUG(dbgs() << "found enqueued kernel: " << F.getName() << '\n');
auto RuntimeHandle = (F.getName() + ".runtime_handle").str();
auto T = Type::getInt8Ty(C)->getPointerTo(AMDGPUAS::GLOBAL_ADDRESS);
auto *GV = new GlobalVariable(
@@ -124,7 +124,7 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) {
/*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal,
AMDGPUAS::GLOBAL_ADDRESS,
/*IsExternallyInitialized=*/false);
- DEBUG(dbgs() << "runtime handle created: " << *GV << '\n');
+ LLVM_DEBUG(dbgs() << "runtime handle created: " << *GV << '\n');
for (auto U : F.users()) {
auto *UU = &*U;
@@ -145,7 +145,7 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) {
if (F->getCallingConv() != CallingConv::AMDGPU_KERNEL)
continue;
F->addFnAttr("calls-enqueue-kernel");
- DEBUG(dbgs() << "mark enqueue_kernel caller:" << F->getName() << '\n');
+ LLVM_DEBUG(dbgs() << "mark enqueue_kernel caller:" << F->getName() << '\n');
}
return Changed;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 7d28c3c8259..9264cb11fb8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -343,13 +343,13 @@ static bool canVectorizeInst(Instruction *Inst, User *User) {
static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
if (DisablePromoteAllocaToVector) {
- DEBUG(dbgs() << " Promotion alloca to vector is disabled\n");
+ LLVM_DEBUG(dbgs() << " Promotion alloca to vector is disabled\n");
return false;
}
ArrayType *AllocaTy = dyn_cast<ArrayType>(Alloca->getAllocatedType());
- DEBUG(dbgs() << "Alloca candidate for vectorization\n");
+ LLVM_DEBUG(dbgs() << "Alloca candidate for vectorization\n");
// FIXME: There is no reason why we can't support larger arrays, we
// are just being conservative for now.
@@ -359,7 +359,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
AllocaTy->getNumElements() > 16 ||
AllocaTy->getNumElements() < 2 ||
!VectorType::isValidElementType(AllocaTy->getElementType())) {
- DEBUG(dbgs() << " Cannot convert type to vector\n");
+ LLVM_DEBUG(dbgs() << " Cannot convert type to vector\n");
return false;
}
@@ -380,7 +380,8 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
// If we can't compute a vector index from this GEP, then we can't
// promote this alloca to vector.
if (!Index) {
- DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP << '\n');
+ LLVM_DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP
+ << '\n');
return false;
}
@@ -395,8 +396,8 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
VectorType *VectorTy = arrayTypeToVecType(AllocaTy);
- DEBUG(dbgs() << " Converting alloca to vector "
- << *AllocaTy << " -> " << *VectorTy << '\n');
+ LLVM_DEBUG(dbgs() << " Converting alloca to vector " << *AllocaTy << " -> "
+ << *VectorTy << '\n');
for (Value *V : WorkList) {
Instruction *Inst = cast<Instruction>(V);
@@ -485,7 +486,8 @@ bool AMDGPUPromoteAlloca::binaryOpIsDerivedFromSameAlloca(Value *BaseAlloca,
// important part is both must have the same address space at
// the end.
if (OtherObj != BaseAlloca) {
- DEBUG(dbgs() << "Found a binary instruction with another alloca object\n");
+ LLVM_DEBUG(
+ dbgs() << "Found a binary instruction with another alloca object\n");
return false;
}
@@ -607,8 +609,8 @@ bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) {
LocalMemLimit = 0;
- DEBUG(dbgs() << "Function has local memory argument. Promoting to "
- "local memory disabled.\n");
+ LLVM_DEBUG(dbgs() << "Function has local memory argument. Promoting to "
+ "local memory disabled.\n");
return false;
}
}
@@ -677,13 +679,12 @@ bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
LocalMemLimit = MaxSizeWithWaveCount;
- DEBUG(
- dbgs() << F.getName() << " uses " << CurrentLocalMemUsage << " bytes of LDS\n"
- << " Rounding size to " << MaxSizeWithWaveCount
- << " with a maximum occupancy of " << MaxOccupancy << '\n'
- << " and " << (LocalMemLimit - CurrentLocalMemUsage)
- << " available for promotion\n"
- );
+ LLVM_DEBUG(dbgs() << F.getName() << " uses " << CurrentLocalMemUsage
+ << " bytes of LDS\n"
+ << " Rounding size to " << MaxSizeWithWaveCount
+ << " with a maximum occupancy of " << MaxOccupancy << '\n'
+ << " and " << (LocalMemLimit - CurrentLocalMemUsage)
+ << " available for promotion\n");
return true;
}
@@ -700,7 +701,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
// First try to replace the alloca with a vector
Type *AllocaTy = I.getAllocatedType();
- DEBUG(dbgs() << "Trying to promote " << I << '\n');
+ LLVM_DEBUG(dbgs() << "Trying to promote " << I << '\n');
if (tryPromoteAllocaToVector(&I, AS))
return true; // Promoted to vector.
@@ -716,7 +717,9 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
case CallingConv::SPIR_KERNEL:
break;
default:
- DEBUG(dbgs() << " promote alloca to LDS not supported with calling convention.\n");
+ LLVM_DEBUG(
+ dbgs()
+ << " promote alloca to LDS not supported with calling convention.\n");
return false;
}
@@ -745,8 +748,8 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
NewSize += AllocSize;
if (NewSize > LocalMemLimit) {
- DEBUG(dbgs() << " " << AllocSize
- << " bytes of local memory not available to promote\n");
+ LLVM_DEBUG(dbgs() << " " << AllocSize
+ << " bytes of local memory not available to promote\n");
return false;
}
@@ -755,11 +758,11 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
std::vector<Value*> WorkList;
if (!collectUsesWithPtrTypes(&I, &I, WorkList)) {
- DEBUG(dbgs() << " Do not know how to convert all uses\n");
+ LLVM_DEBUG(dbgs() << " Do not know how to convert all uses\n");
return false;
}
- DEBUG(dbgs() << "Promoting alloca to local memory\n");
+ LLVM_DEBUG(dbgs() << "Promoting alloca to local memory\n");
Function *F = I.getParent()->getParent();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
index 83e56a9ab49..a861762a8c9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
@@ -249,8 +249,8 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
SmallVector<Argument *, 4> OutArgs;
for (Argument &Arg : F.args()) {
if (isOutArgumentCandidate(Arg)) {
- DEBUG(dbgs() << "Found possible out argument " << Arg
- << " in function " << F.getName() << '\n');
+ LLVM_DEBUG(dbgs() << "Found possible out argument " << Arg
+ << " in function " << F.getName() << '\n');
OutArgs.push_back(&Arg);
}
}
@@ -310,7 +310,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
SI = dyn_cast<StoreInst>(Q.getInst());
if (SI) {
- DEBUG(dbgs() << "Found out argument store: " << *SI << '\n');
+ LLVM_DEBUG(dbgs() << "Found out argument store: " << *SI << '\n');
ReplaceableStores.emplace_back(RI, SI);
} else {
ThisReplaceable = false;
@@ -328,7 +328,8 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
if (llvm::find_if(ValVec,
[OutArg](const std::pair<Argument *, Value *> &Entry) {
return Entry.first == OutArg;}) != ValVec.end()) {
- DEBUG(dbgs() << "Saw multiple out arg stores" << *OutArg << '\n');
+ LLVM_DEBUG(dbgs()
+ << "Saw multiple out arg stores" << *OutArg << '\n');
// It is possible to see stores to the same argument multiple times,
// but we expect these would have been optimized out already.
ThisReplaceable = false;
@@ -358,7 +359,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
F.getFunctionType()->params(),
F.isVarArg());
- DEBUG(dbgs() << "Computed new return type: " << *NewRetTy << '\n');
+ LLVM_DEBUG(dbgs() << "Computed new return type: " << *NewRetTy << '\n');
Function *NewFunc = Function::Create(NewFuncTy, Function::PrivateLinkage,
F.getName() + ".body");
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 3ea7a82ea7a..ce17f027b52 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -124,8 +124,9 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
continue;
if (dependsOnLocalPhi(L, Br->getCondition())) {
UP.Threshold += UnrollThresholdIf;
- DEBUG(dbgs() << "Set unroll threshold " << UP.Threshold
- << " for loop:\n" << *L << " due to " << *Br << '\n');
+ LLVM_DEBUG(dbgs() << "Set unroll threshold " << UP.Threshold
+ << " for loop:\n"
+ << *L << " due to " << *Br << '\n');
if (UP.Threshold >= MaxBoost)
return;
}
@@ -201,8 +202,9 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
// Don't use the maximum allowed value here as it will make some
// programs way too big.
UP.Threshold = Threshold;
- DEBUG(dbgs() << "Set unroll threshold " << Threshold << " for loop:\n"
- << *L << " due to " << *GEP << '\n');
+ LLVM_DEBUG(dbgs() << "Set unroll threshold " << Threshold
+ << " for loop:\n"
+ << *L << " due to " << *GEP << '\n');
if (UP.Threshold >= MaxBoost)
return;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
index 1793de63b59..cd1801a8244 100644
--- a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
@@ -78,23 +78,18 @@ namespace {
//
//===----------------------------------------------------------------------===//
-#define SHOWNEWINSTR(i) \
- DEBUG(dbgs() << "New instr: " << *i << "\n");
-
-#define SHOWNEWBLK(b, msg) \
-DEBUG( \
- dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
- dbgs() << "\n"; \
-);
-
-#define SHOWBLK_DETAIL(b, msg) \
-DEBUG( \
- if (b) { \
- dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
- b->print(dbgs()); \
- dbgs() << "\n"; \
- } \
-);
+#define SHOWNEWINSTR(i) LLVM_DEBUG(dbgs() << "New instr: " << *i << "\n");
+
+#define SHOWNEWBLK(b, msg) \
+ LLVM_DEBUG(dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+ dbgs() << "\n";);
+
+#define SHOWBLK_DETAIL(b, msg) \
+ LLVM_DEBUG(if (b) { \
+ dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+ b->print(dbgs()); \
+ dbgs() << "\n"; \
+ });
#define INVALIDSCCNUM -1
@@ -158,19 +153,19 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override {
TII = MF.getSubtarget<R600Subtarget>().getInstrInfo();
TRI = &TII->getRegisterInfo();
- DEBUG(MF.dump(););
+ LLVM_DEBUG(MF.dump(););
OrderedBlks.clear();
Visited.clear();
FuncRep = &MF;
MLI = &getAnalysis<MachineLoopInfo>();
- DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
+ LLVM_DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
MDT = &getAnalysis<MachineDominatorTree>();
- DEBUG(MDT->print(dbgs(), (const Module*)nullptr););
+ LLVM_DEBUG(MDT->print(dbgs(), (const Module *)nullptr););
PDT = &getAnalysis<MachinePostDominatorTree>();
- DEBUG(PDT->print(dbgs()););
+ LLVM_DEBUG(PDT->print(dbgs()););
prepare();
run();
- DEBUG(MF.dump(););
+ LLVM_DEBUG(MF.dump(););
return true;
}
@@ -650,9 +645,8 @@ bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
if (MI)
assert(IsReturn);
else if (IsReturn)
- DEBUG(
- dbgs() << "BB" << MBB->getNumber()
- <<" is return block without RETURN instr\n";);
+ LLVM_DEBUG(dbgs() << "BB" << MBB->getNumber()
+ << " is return block without RETURN instr\n";);
return IsReturn;
}
@@ -714,7 +708,7 @@ bool AMDGPUCFGStructurizer::prepare() {
//FIXME: if not reducible flow graph, make it so ???
- DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
+ LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
orderBlocks(FuncRep);
@@ -757,14 +751,14 @@ bool AMDGPUCFGStructurizer::prepare() {
bool AMDGPUCFGStructurizer::run() {
//Assume reducible CFG...
- DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n");
+ LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n");
#ifdef STRESSTEST
//Use the worse block ordering to test the algorithm.
ReverseVector(orderedBlks);
#endif
- DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks(););
+ LLVM_DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks(););
int NumIter = 0;
bool Finish = false;
MachineBasicBlock *MBB;
@@ -774,10 +768,8 @@ bool AMDGPUCFGStructurizer::run() {
do {
++NumIter;
- DEBUG(
- dbgs() << "numIter = " << NumIter
- << ", numRemaintedBlk = " << NumRemainedBlk << "\n";
- );
+ LLVM_DEBUG(dbgs() << "numIter = " << NumIter
+ << ", numRemaintedBlk = " << NumRemainedBlk << "\n";);
SmallVectorImpl<MachineBasicBlock *>::const_iterator It =
OrderedBlks.begin();
@@ -799,10 +791,8 @@ bool AMDGPUCFGStructurizer::run() {
SccBeginMBB = MBB;
SccNumIter = 0;
SccNumBlk = NumRemainedBlk; // Init to maximum possible number.
- DEBUG(
- dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB);
- dbgs() << "\n";
- );
+ LLVM_DEBUG(dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB);
+ dbgs() << "\n";);
}
if (!isRetiredBlock(MBB))
@@ -817,20 +807,16 @@ bool AMDGPUCFGStructurizer::run() {
++SccNumIter;
int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) {
- DEBUG(
- dbgs() << "Can't reduce SCC " << getSCCNum(MBB)
- << ", sccNumIter = " << SccNumIter;
- dbgs() << "doesn't make any progress\n";
- );
+ LLVM_DEBUG(dbgs() << "Can't reduce SCC " << getSCCNum(MBB)
+ << ", sccNumIter = " << SccNumIter;
+ dbgs() << "doesn't make any progress\n";);
ContNextScc = true;
} else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) {
SccNumBlk = sccRemainedNumBlk;
It = SccBeginIter;
ContNextScc = false;
- DEBUG(
- dbgs() << "repeat processing SCC" << getSCCNum(MBB)
- << "sccNumIter = " << SccNumIter << '\n';
- );
+ LLVM_DEBUG(dbgs() << "repeat processing SCC" << getSCCNum(MBB)
+ << "sccNumIter = " << SccNumIter << '\n';);
} else {
// Finish the current scc.
ContNextScc = true;
@@ -848,9 +834,7 @@ bool AMDGPUCFGStructurizer::run() {
*GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
if (EntryMBB->succ_size() == 0) {
Finish = true;
- DEBUG(
- dbgs() << "Reduce to one block\n";
- );
+ LLVM_DEBUG(dbgs() << "Reduce to one block\n";);
} else {
int NewnumRemainedBlk
= countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
@@ -860,9 +844,7 @@ bool AMDGPUCFGStructurizer::run() {
NumRemainedBlk = NewnumRemainedBlk;
} else {
MakeProgress = false;
- DEBUG(
- dbgs() << "No progress\n";
- );
+ LLVM_DEBUG(dbgs() << "No progress\n";);
}
}
} while (!Finish && MakeProgress);
@@ -875,9 +857,7 @@ bool AMDGPUCFGStructurizer::run() {
It != E; ++It) {
if ((*It).second && (*It).second->IsRetired) {
assert(((*It).first)->getNumber() != -1);
- DEBUG(
- dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";
- );
+ LLVM_DEBUG(dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";);
(*It).first->eraseFromParent(); //Remove from the parent Function.
}
delete (*It).second;
@@ -886,7 +866,7 @@ bool AMDGPUCFGStructurizer::run() {
LLInfoMap.clear();
if (!Finish) {
- DEBUG(FuncRep->viewCFG());
+ LLVM_DEBUG(FuncRep->viewCFG());
report_fatal_error("IRREDUCIBLE_CFG");
}
@@ -920,17 +900,13 @@ int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
int NumMatch = 0;
int CurMatch;
- DEBUG(
- dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";
- );
+ LLVM_DEBUG(dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";);
while ((CurMatch = patternMatchGroup(MBB)) > 0)
NumMatch += CurMatch;
- DEBUG(
- dbgs() << "End patternMatch BB" << MBB->getNumber()
- << ", numMatch = " << NumMatch << "\n";
- );
+ LLVM_DEBUG(dbgs() << "End patternMatch BB" << MBB->getNumber()
+ << ", numMatch = " << NumMatch << "\n";);
return NumMatch;
}
@@ -1050,7 +1026,7 @@ int AMDGPUCFGStructurizer::loopendPatternMatch() {
for (MachineLoop *ExaminedLoop : NestedLoops) {
if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
continue;
- DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
+ LLVM_DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
int NumBreak = mergeLoop(ExaminedLoop);
if (NumBreak == -1)
break;
@@ -1064,7 +1040,8 @@ int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
MBBVector ExitingMBBs;
LoopRep->getExitingBlocks(ExitingMBBs);
assert(!ExitingMBBs.empty() && "Infinite Loop not supported");
- DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() << " exiting blocks\n";);
+ LLVM_DEBUG(dbgs() << "Loop has " << ExitingMBBs.size()
+ << " exiting blocks\n";);
// We assume a single ExitBlk
MBBVector ExitBlks;
LoopRep->getExitBlocks(ExitBlks);
@@ -1106,11 +1083,9 @@ bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
if (TheEntry) {
- DEBUG(
- dbgs() << "isLoopContBreakBlock yes src1 = BB"
- << Src1MBB->getNumber()
- << " src2 = BB" << Src2MBB->getNumber() << "\n";
- );
+ LLVM_DEBUG(dbgs() << "isLoopContBreakBlock yes src1 = BB"
+ << Src1MBB->getNumber() << " src2 = BB"
+ << Src2MBB->getNumber() << "\n";);
return true;
}
}
@@ -1122,9 +1097,8 @@ int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
if (Num == 0) {
- DEBUG(
- dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
- );
+ LLVM_DEBUG(dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk"
+ << "\n";);
Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
}
return Num;
@@ -1138,22 +1112,16 @@ int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
//trueBlk could be the common post dominator
DownBlk = TrueMBB;
- DEBUG(
- dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
- << " true = BB" << TrueMBB->getNumber()
- << ", numSucc=" << TrueMBB->succ_size()
- << " false = BB" << FalseMBB->getNumber() << "\n";
- );
+ LLVM_DEBUG(dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
+ << " true = BB" << TrueMBB->getNumber()
+ << ", numSucc=" << TrueMBB->succ_size() << " false = BB"
+ << FalseMBB->getNumber() << "\n";);
while (DownBlk) {
- DEBUG(
- dbgs() << "check down = BB" << DownBlk->getNumber();
- );
+ LLVM_DEBUG(dbgs() << "check down = BB" << DownBlk->getNumber(););
if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
- DEBUG(
- dbgs() << " working\n";
- );
+ LLVM_DEBUG(dbgs() << " working\n";);
Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
@@ -1166,9 +1134,7 @@ int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
break;
}
- DEBUG(
- dbgs() << " not working\n";
- );
+ LLVM_DEBUG(dbgs() << " not working\n";);
DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr;
} // walk down the postDomTree
@@ -1247,10 +1213,9 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1)
MigrateFalse = true;
- DEBUG(
- dbgs() << "before improveSimpleJumpintoIf: ";
- showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
- );
+ LLVM_DEBUG(
+ dbgs() << "before improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0););
// org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
//
@@ -1385,10 +1350,9 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
report_fatal_error("Extra register needed to handle CFG");
}
}
- DEBUG(
- dbgs() << "result from improveSimpleJumpintoIf: ";
- showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
- );
+ LLVM_DEBUG(
+ dbgs() << "result from improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0););
// update landBlk
*LandMBBPtr = LandBlk;
@@ -1398,10 +1362,8 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
MachineBasicBlock *SrcMBB) {
- DEBUG(
- dbgs() << "serialPattern BB" << DstMBB->getNumber()
- << " <= BB" << SrcMBB->getNumber() << "\n";
- );
+ LLVM_DEBUG(dbgs() << "serialPattern BB" << DstMBB->getNumber() << " <= BB"
+ << SrcMBB->getNumber() << "\n";);
DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
DstMBB->removeSuccessor(SrcMBB, true);
@@ -1416,26 +1378,15 @@ void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
assert (TrueMBB);
- DEBUG(
- dbgs() << "ifPattern BB" << MBB->getNumber();
- dbgs() << "{ ";
- if (TrueMBB) {
- dbgs() << "BB" << TrueMBB->getNumber();
- }
- dbgs() << " } else ";
- dbgs() << "{ ";
- if (FalseMBB) {
- dbgs() << "BB" << FalseMBB->getNumber();
- }
- dbgs() << " }\n ";
- dbgs() << "landBlock: ";
- if (!LandMBB) {
- dbgs() << "NULL";
- } else {
- dbgs() << "BB" << LandMBB->getNumber();
- }
- dbgs() << "\n";
- );
+ LLVM_DEBUG(dbgs() << "ifPattern BB" << MBB->getNumber(); dbgs() << "{ ";
+ if (TrueMBB) { dbgs() << "BB" << TrueMBB->getNumber(); } dbgs()
+ << " } else ";
+ dbgs() << "{ "; if (FalseMBB) {
+ dbgs() << "BB" << FalseMBB->getNumber();
+ } dbgs() << " }\n ";
+ dbgs() << "landBlock: "; if (!LandMBB) { dbgs() << "NULL"; } else {
+ dbgs() << "BB" << LandMBB->getNumber();
+ } dbgs() << "\n";);
int OldOpcode = BranchMI->getOpcode();
DebugLoc BranchDL = BranchMI->getDebugLoc();
@@ -1481,8 +1432,8 @@ void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
MachineBasicBlock *LandMBB) {
- DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
- << " land = BB" << LandMBB->getNumber() << "\n";);
+ LLVM_DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
+ << " land = BB" << LandMBB->getNumber() << "\n";);
insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc());
insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc());
@@ -1491,8 +1442,9 @@ void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
MachineBasicBlock *LandMBB) {
- DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber()
- << " land = BB" << LandMBB->getNumber() << "\n";);
+ LLVM_DEBUG(dbgs() << "loopbreakPattern exiting = BB"
+ << ExitingMBB->getNumber() << " land = BB"
+ << LandMBB->getNumber() << "\n";);
MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
assert(BranchMI && isCondBranch(BranchMI));
DebugLoc DL = BranchMI->getDebugLoc();
@@ -1511,9 +1463,9 @@ void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
MachineBasicBlock *ContMBB) {
- DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
- << ContingMBB->getNumber()
- << ", cont = BB" << ContMBB->getNumber() << "\n";);
+ LLVM_DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
+ << ContingMBB->getNumber() << ", cont = BB"
+ << ContMBB->getNumber() << "\n";);
MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
if (MI) {
@@ -1587,10 +1539,9 @@ AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
numClonedInstr += MBB->size();
- DEBUG(
- dbgs() << "Cloned block: " << "BB"
- << MBB->getNumber() << "size " << MBB->size() << "\n";
- );
+ LLVM_DEBUG(dbgs() << "Cloned block: "
+ << "BB" << MBB->getNumber() << "size " << MBB->size()
+ << "\n";);
SHOWNEWBLK(CloneMBB, "result of Cloned block: ");
@@ -1603,26 +1554,22 @@ void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
//look for the input branchinstr, not the AMDGPU branchinstr
MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
if (!BranchMI) {
- DEBUG(
- dbgs() << "migrateInstruction don't see branch instr\n";
- );
+ LLVM_DEBUG(dbgs() << "migrateInstruction don't see branch instr\n";);
SpliceEnd = SrcMBB->end();
} else {
- DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI);
+ LLVM_DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI);
SpliceEnd = BranchMI;
}
- DEBUG(
- dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size()
- << "srcSize = " << SrcMBB->size() << "\n";
- );
+ LLVM_DEBUG(dbgs() << "migrateInstruction before splice dstSize = "
+ << DstMBB->size() << "srcSize = " << SrcMBB->size()
+ << "\n";);
//splice insert before insertPos
DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd);
- DEBUG(
- dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size()
- << "srcSize = " << SrcMBB->size() << '\n';
- );
+ LLVM_DEBUG(dbgs() << "migrateInstruction after splice dstSize = "
+ << DstMBB->size() << "srcSize = " << SrcMBB->size()
+ << '\n';);
}
MachineBasicBlock *
@@ -1640,7 +1587,7 @@ AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
FuncRep->push_back(DummyExitBlk); //insert to function
SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
- DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
+ LLVM_DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
LLVMContext &Ctx = LoopHeader->getParent()->getFunction().getContext();
Ctx.emitError("Extra register needed to handle CFG");
return nullptr;
@@ -1653,7 +1600,7 @@ void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
// test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
while ((BranchMI = getLoopendBlockBranchInstr(MBB))
&& isUncondBranch(BranchMI)) {
- DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI);
+ LLVM_DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI);
BranchMI->eraseFromParent();
}
}
@@ -1669,7 +1616,7 @@ void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
assert(BranchMI && isCondBranch(BranchMI));
- DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI);
+ LLVM_DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI);
BranchMI->eraseFromParent();
SHOWNEWBLK(MBB1, "Removing redundant successor");
MBB->removeSuccessor(MBB1, true);
@@ -1688,10 +1635,8 @@ void AMDGPUCFGStructurizer::addDummyExitBlock(
if (MI)
MI->eraseFromParent();
MBB->addSuccessor(DummyExitBlk);
- DEBUG(
- dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
- << " successors\n";
- );
+ LLVM_DEBUG(dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
+ << " successors\n";);
}
SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: ");
}
@@ -1710,9 +1655,7 @@ void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
}
void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
- DEBUG(
- dbgs() << "Retiring BB" << MBB->getNumber() << "\n";
- );
+ LLVM_DEBUG(dbgs() << "Retiring BB" << MBB->getNumber() << "\n";);
BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
diff --git a/llvm/lib/Target/AMDGPU/GCNILPSched.cpp b/llvm/lib/Target/AMDGPU/GCNILPSched.cpp
index ba8211b189c..651091d4413 100644
--- a/llvm/lib/Target/AMDGPU/GCNILPSched.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNILPSched.cpp
@@ -149,9 +149,9 @@ static int BUCompareLatency(const SUnit *left, const SUnit *right) {
int LDepth = left->getDepth();
int RDepth = right->getDepth();
if (LDepth != RDepth) {
- DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
- << ") depth " << LDepth << " vs SU (" << right->NodeNum
- << ") depth " << RDepth << "\n");
+ LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
+ << ") depth " << LDepth << " vs SU (" << right->NodeNum
+ << ") depth " << RDepth << "\n");
return LDepth < RDepth ? 1 : -1;
}
if (left->Latency != right->Latency)
@@ -169,9 +169,9 @@ const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right)
if (!DisableSchedCriticalPath) {
int spread = (int)left->getDepth() - (int)right->getDepth();
if (std::abs(spread) > MaxReorderWindow) {
- DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
- << left->getDepth() << " != SU(" << right->NodeNum << "): "
- << right->getDepth() << "\n");
+ LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
+ << left->getDepth() << " != SU(" << right->NodeNum
+ << "): " << right->getDepth() << "\n");
return left->getDepth() < right->getDepth() ? right : left;
}
}
@@ -324,19 +324,18 @@ GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
if (AvailQueue.empty())
break;
- DEBUG(
- dbgs() << "\n=== Picking candidate\n"
- "Ready queue:";
- for (auto &C : AvailQueue)
- dbgs() << ' ' << C.SU->NodeNum;
- dbgs() << '\n';
- );
+ LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"
+ "Ready queue:";
+ for (auto &C
+ : AvailQueue) dbgs()
+ << ' ' << C.SU->NodeNum;
+ dbgs() << '\n';);
auto C = pickCandidate();
assert(C);
AvailQueue.remove(*C);
auto SU = C->SU;
- DEBUG(dbgs() << "Selected "; SU->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "Selected "; SU->dump(&DAG));
advanceToCycle(SU->getHeight());
diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index 182ce1e4f63..7f0d80f3d77 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -200,8 +200,8 @@ public:
void schedule() {
assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End);
- DEBUG(dbgs() << "\nScheduling ";
- printRegion(dbgs(), Rgn.Begin, Rgn.End, Sch.LIS, 2));
+ LLVM_DEBUG(dbgs() << "\nScheduling ";
+ printRegion(dbgs(), Rgn.Begin, Rgn.End, Sch.LIS, 2));
Sch.BaseClass::schedule();
// Unfortunatelly placeDebugValues incorrectly modifies RegionEnd, restore
@@ -311,14 +311,13 @@ void GCNIterativeScheduler::enterRegion(MachineBasicBlock *BB, // overriden
void GCNIterativeScheduler::schedule() { // overriden
// do nothing
- DEBUG(
- printLivenessInfo(dbgs(), RegionBegin, RegionEnd, LIS);
- if (!Regions.empty() && Regions.back()->Begin == RegionBegin) {
- dbgs() << "Max RP: ";
- Regions.back()->MaxPressure.print(dbgs(), &MF.getSubtarget<SISubtarget>());
- }
- dbgs() << '\n';
- );
+ LLVM_DEBUG(printLivenessInfo(dbgs(), RegionBegin, RegionEnd, LIS);
+ if (!Regions.empty() && Regions.back()->Begin == RegionBegin) {
+ dbgs() << "Max RP: ";
+ Regions.back()->MaxPressure.print(
+ dbgs(), &MF.getSubtarget<SISubtarget>());
+ } dbgs()
+ << '\n';);
}
void GCNIterativeScheduler::finalizeSchedule() { // overriden
@@ -453,22 +452,22 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
// TODO: assert Regions are sorted descending by pressure
const auto &ST = MF.getSubtarget<SISubtarget>();
const auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
- DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc
- << ", current = " << Occ << '\n');
+ LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc
+ << ", current = " << Occ << '\n');
auto NewOcc = TargetOcc;
for (auto R : Regions) {
if (R->MaxPressure.getOccupancy(ST) >= NewOcc)
break;
- DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3);
- printLivenessInfo(dbgs(), R->Begin, R->End, LIS));
+ LLVM_DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3);
+ printLivenessInfo(dbgs(), R->Begin, R->End, LIS));
BuildDAG DAG(*R, *this);
const auto MinSchedule = makeMinRegSchedule(DAG.getTopRoots(), *this);
const auto MaxRP = getSchedulePressure(*R, MinSchedule);
- DEBUG(dbgs() << "Occupancy improvement attempt:\n";
- printSchedRP(dbgs(), R->MaxPressure, MaxRP));
+ LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n";
+ printSchedRP(dbgs(), R->MaxPressure, MaxRP));
NewOcc = std::min(NewOcc, MaxRP.getOccupancy(ST));
if (NewOcc <= Occ)
@@ -476,8 +475,8 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
setBestSchedule(*R, MinSchedule, MaxRP);
}
- DEBUG(dbgs() << "New occupancy = " << NewOcc
- << ", prev occupancy = " << Occ << '\n');
+ LLVM_DEBUG(dbgs() << "New occupancy = " << NewOcc
+ << ", prev occupancy = " << Occ << '\n');
return std::max(NewOcc, Occ);
}
@@ -497,8 +496,9 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
const int NumPasses = Occ < TgtOcc ? 2 : 1;
TgtOcc = std::min(Occ, TgtOcc);
- DEBUG(dbgs() << "Scheduling using default scheduler, "
- "target occupancy = " << TgtOcc << '\n');
+ LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
+ "target occupancy = "
+ << TgtOcc << '\n');
GCNMaxOccupancySchedStrategy LStrgy(Context);
for (int I = 0; I < NumPasses; ++I) {
@@ -510,16 +510,16 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
Ovr.schedule();
const auto RP = getRegionPressure(*R);
- DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
+ LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
if (RP.getOccupancy(ST) < TgtOcc) {
- DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
+ LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
if (R->BestSchedule.get() &&
R->BestSchedule->MaxPressure.getOccupancy(ST) >= TgtOcc) {
- DEBUG(dbgs() << ", scheduling minimal register\n");
+ LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
scheduleBest(*R);
} else {
- DEBUG(dbgs() << ", restoring\n");
+ LLVM_DEBUG(dbgs() << ", restoring\n");
Ovr.restoreOrder();
assert(R->MaxPressure.getOccupancy(ST) >= TgtOcc);
}
@@ -545,7 +545,7 @@ void GCNIterativeScheduler::scheduleMinReg(bool force) {
const auto MinSchedule = makeMinRegSchedule(DAG.getTopRoots(), *this);
const auto RP = getSchedulePressure(*R, MinSchedule);
- DEBUG(if (R->MaxPressure.less(ST, RP, TgtOcc)) {
+ LLVM_DEBUG(if (R->MaxPressure.less(ST, RP, TgtOcc)) {
dbgs() << "\nWarning: Pressure becomes worse after minreg!";
printSchedRP(dbgs(), R->MaxPressure, RP);
});
@@ -554,7 +554,7 @@ void GCNIterativeScheduler::scheduleMinReg(bool force) {
break;
scheduleRegion(*R, MinSchedule, RP);
- DEBUG(printSchedResult(dbgs(), R, RP));
+ LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
MaxPressure = RP;
}
@@ -577,26 +577,27 @@ void GCNIterativeScheduler::scheduleILP(
Occ = tryMaximizeOccupancy(TgtOcc);
TgtOcc = std::min(Occ, TgtOcc);
- DEBUG(dbgs() << "Scheduling using default scheduler, "
- "target occupancy = " << TgtOcc << '\n');
+ LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
+ "target occupancy = "
+ << TgtOcc << '\n');
for (auto R : Regions) {
BuildDAG DAG(*R, *this);
const auto ILPSchedule = makeGCNILPScheduler(DAG.getBottomRoots(), *this);
const auto RP = getSchedulePressure(*R, ILPSchedule);
- DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
+ LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
if (RP.getOccupancy(ST) < TgtOcc) {
- DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
+ LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
if (R->BestSchedule.get() &&
R->BestSchedule->MaxPressure.getOccupancy(ST) >= TgtOcc) {
- DEBUG(dbgs() << ", scheduling minimal register\n");
+ LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
scheduleBest(*R);
}
} else {
scheduleRegion(*R, ILPSchedule, RP);
- DEBUG(printSchedResult(dbgs(), R, RP));
+ LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
}
}
}
diff --git a/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
index 9904b5f0f4b..192d534bb9c 100644
--- a/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
@@ -142,35 +142,38 @@ GCNMinRegScheduler::Candidate* GCNMinRegScheduler::pickCandidate() {
unsigned Num = RQ.size();
if (Num == 1) break;
- DEBUG(dbgs() << "\nSelecting max priority candidates among " << Num << '\n');
+ LLVM_DEBUG(dbgs() << "\nSelecting max priority candidates among " << Num
+ << '\n');
Num = findMax(Num, [=](const Candidate &C) { return C.Priority; });
if (Num == 1) break;
- DEBUG(dbgs() << "\nSelecting min non-ready producing candidate among "
- << Num << '\n');
+ LLVM_DEBUG(dbgs() << "\nSelecting min non-ready producing candidate among "
+ << Num << '\n');
Num = findMax(Num, [=](const Candidate &C) {
auto SU = C.SU;
int Res = getNotReadySuccessors(SU);
- DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would left non-ready "
- << Res << " successors, metric = " << -Res << '\n');
+ LLVM_DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would left non-ready "
+ << Res << " successors, metric = " << -Res << '\n');
return -Res;
});
if (Num == 1) break;
- DEBUG(dbgs() << "\nSelecting most producing candidate among "
- << Num << '\n');
+ LLVM_DEBUG(dbgs() << "\nSelecting most producing candidate among " << Num
+ << '\n');
Num = findMax(Num, [=](const Candidate &C) {
auto SU = C.SU;
auto Res = getReadySuccessors(SU);
- DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would make ready "
- << Res << " successors, metric = " << Res << '\n');
+ LLVM_DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would make ready " << Res
+ << " successors, metric = " << Res << '\n');
return Res;
});
if (Num == 1) break;
Num = Num ? Num : RQ.size();
- DEBUG(dbgs() << "\nCan't find best candidate, selecting in program order among "
- << Num << '\n');
+ LLVM_DEBUG(
+ dbgs()
+ << "\nCan't find best candidate, selecting in program order among "
+ << Num << '\n');
Num = findMax(Num, [=](const Candidate &C) { return -(int64_t)C.SU->NodeNum; });
assert(Num == 1);
} while (false);
@@ -202,17 +205,17 @@ void GCNMinRegScheduler::bumpPredsPriority(const SUnit *SchedSU, int Priority) {
Worklist.push_back(P.getSUnit());
}
}
- DEBUG(dbgs() << "Make the predecessors of SU(" << SchedSU->NodeNum
- << ")'s non-ready successors of " << Priority
- << " priority in ready queue: ");
+ LLVM_DEBUG(dbgs() << "Make the predecessors of SU(" << SchedSU->NodeNum
+ << ")'s non-ready successors of " << Priority
+ << " priority in ready queue: ");
const auto SetEnd = Set.end();
for (auto &C : RQ) {
if (Set.find(C.SU) != SetEnd) {
C.Priority = Priority;
- DEBUG(dbgs() << " SU(" << C.SU->NodeNum << ')');
+ LLVM_DEBUG(dbgs() << " SU(" << C.SU->NodeNum << ')');
}
}
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << '\n');
}
void GCNMinRegScheduler::releaseSuccessors(const SUnit* SU, int Priority) {
@@ -243,19 +246,19 @@ GCNMinRegScheduler::schedule(ArrayRef<const SUnit*> TopRoots,
releaseSuccessors(&DAG.EntrySU, StepNo);
while (!RQ.empty()) {
- DEBUG(
- dbgs() << "\n=== Picking candidate, Step = " << StepNo << "\n"
- "Ready queue:";
- for (auto &C : RQ)
- dbgs() << ' ' << C.SU->NodeNum << "(P" << C.Priority << ')';
- dbgs() << '\n';
- );
+ LLVM_DEBUG(dbgs() << "\n=== Picking candidate, Step = " << StepNo
+ << "\n"
+ "Ready queue:";
+ for (auto &C
+ : RQ) dbgs()
+ << ' ' << C.SU->NodeNum << "(P" << C.Priority << ')';
+ dbgs() << '\n';);
auto C = pickCandidate();
assert(C);
RQ.remove(*C);
auto SU = C->SU;
- DEBUG(dbgs() << "Selected "; SU->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "Selected "; SU->dump(&DAG));
releaseSuccessors(SU, StepNo);
Schedule.push_back(SU);
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 12305446fa4..c0a6765df34 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -200,34 +200,30 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
// See if BotCand is still valid (because we previously scheduled from Top).
- DEBUG(dbgs() << "Picking from Bot:\n");
+ LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
if (!BotCand.isValid() || BotCand.SU->isScheduled ||
BotCand.Policy != BotPolicy) {
BotCand.reset(CandPolicy());
pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
assert(BotCand.Reason != NoCand && "failed to find the first candidate");
} else {
- DEBUG(traceCandidate(BotCand));
+ LLVM_DEBUG(traceCandidate(BotCand));
}
// Check if the top Q has a better candidate.
- DEBUG(dbgs() << "Picking from Top:\n");
+ LLVM_DEBUG(dbgs() << "Picking from Top:\n");
if (!TopCand.isValid() || TopCand.SU->isScheduled ||
TopCand.Policy != TopPolicy) {
TopCand.reset(CandPolicy());
pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
assert(TopCand.Reason != NoCand && "failed to find the first candidate");
} else {
- DEBUG(traceCandidate(TopCand));
+ LLVM_DEBUG(traceCandidate(TopCand));
}
// Pick best from BotCand and TopCand.
- DEBUG(
- dbgs() << "Top Cand: ";
- traceCandidate(TopCand);
- dbgs() << "Bot Cand: ";
- traceCandidate(BotCand);
- );
+ LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
+ dbgs() << "Bot Cand: "; traceCandidate(BotCand););
SchedCandidate Cand;
if (TopCand.Reason == BotCand.Reason) {
Cand = BotCand;
@@ -256,10 +252,7 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
}
}
}
- DEBUG(
- dbgs() << "Picking: ";
- traceCandidate(Cand);
- );
+ LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
IsTopNode = Cand.AtTop;
return Cand.SU;
@@ -305,7 +298,8 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
if (SU->isBottomReady())
Bot.removeReady(SU);
- DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
+ LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
+ << *SU->getInstr());
return SU;
}
@@ -319,7 +313,7 @@ GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C,
MFI.getMaxWavesPerEU())),
MinOccupancy(StartingOccupancy), Stage(0), RegionIdx(0) {
- DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
+ LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
}
void GCNScheduleDAGMILive::schedule() {
@@ -339,12 +333,12 @@ void GCNScheduleDAGMILive::schedule() {
if (LIS) {
PressureBefore = Pressure[RegionIdx];
- DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:";
- GCNRPTracker::printLiveRegs(dbgs(), LiveIns[RegionIdx], MRI);
- dbgs() << "Region live-in pressure: ";
- llvm::getRegPressure(MRI, LiveIns[RegionIdx]).print(dbgs());
- dbgs() << "Region register pressure: ";
- PressureBefore.print(dbgs()));
+ LLVM_DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:";
+ GCNRPTracker::printLiveRegs(dbgs(), LiveIns[RegionIdx], MRI);
+ dbgs() << "Region live-in pressure: ";
+ llvm::getRegPressure(MRI, LiveIns[RegionIdx]).print(dbgs());
+ dbgs() << "Region register pressure: ";
+ PressureBefore.print(dbgs()));
}
ScheduleDAGMILive::schedule();
@@ -357,12 +351,13 @@ void GCNScheduleDAGMILive::schedule() {
GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
auto PressureAfter = getRealRegPressure();
- DEBUG(dbgs() << "Pressure after scheduling: "; PressureAfter.print(dbgs()));
+ LLVM_DEBUG(dbgs() << "Pressure after scheduling: ";
+ PressureAfter.print(dbgs()));
if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
PressureAfter.getVGPRNum() <= S.VGPRCriticalLimit) {
Pressure[RegionIdx] = PressureAfter;
- DEBUG(dbgs() << "Pressure in desired limits, done.\n");
+ LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
return;
}
unsigned WavesAfter = getMaxWaves(PressureAfter.getSGPRNum(),
@@ -371,16 +366,16 @@ void GCNScheduleDAGMILive::schedule() {
PressureBefore.getVGPRNum(), MF);
WavesAfter = std::min(WavesAfter, MFI.getMaxWavesPerEU());
WavesBefore = std::min(WavesBefore, MFI.getMaxWavesPerEU());
- DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore <<
- ", after " << WavesAfter << ".\n");
+ LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
+ << ", after " << WavesAfter << ".\n");
// We could not keep current target occupancy because of the just scheduled
// region. Record new occupancy for next scheduling cycle.
unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
if (NewOccupancy < MinOccupancy) {
MinOccupancy = NewOccupancy;
- DEBUG(dbgs() << "Occupancy lowered for the function to "
- << MinOccupancy << ".\n");
+ LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
+ << MinOccupancy << ".\n");
}
if (WavesAfter >= WavesBefore) {
@@ -388,7 +383,7 @@ void GCNScheduleDAGMILive::schedule() {
return;
}
- DEBUG(dbgs() << "Attempting to revert scheduling.\n");
+ LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
RegionEnd = RegionBegin;
for (MachineInstr *MI : Unsched) {
if (MI->isDebugInstr())
@@ -418,7 +413,7 @@ void GCNScheduleDAGMILive::schedule() {
}
RegionEnd = MI->getIterator();
++RegionEnd;
- DEBUG(dbgs() << "Scheduling " << *MI);
+ LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
}
RegionBegin = Unsched.front()->getIterator();
Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
@@ -493,7 +488,7 @@ void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) {
void GCNScheduleDAGMILive::finalizeSchedule() {
GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
- DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
+ LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
LiveIns.resize(Regions.size());
Pressure.resize(Regions.size());
@@ -512,9 +507,10 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
if (!LIS || StartingOccupancy <= MinOccupancy)
break;
- DEBUG(dbgs()
- << "Retrying function scheduling with lowest recorded occupancy "
- << MinOccupancy << ".\n");
+ LLVM_DEBUG(
+ dbgs()
+ << "Retrying function scheduling with lowest recorded occupancy "
+ << MinOccupancy << ".\n");
S.setTargetOccupancy(MinOccupancy);
}
@@ -540,12 +536,13 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
continue;
}
- DEBUG(dbgs() << "********** MI Scheduling **********\n");
- DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*MBB) << " "
- << MBB->getName() << "\n From: " << *begin() << " To: ";
- if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
- else dbgs() << "End";
- dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
+ LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
+ LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*MBB) << " "
+ << MBB->getName() << "\n From: " << *begin()
+ << " To: ";
+ if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
+ else dbgs() << "End";
+ dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
schedule();
diff --git a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
index 5e1ba6b506d..a6838875622 100644
--- a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
@@ -121,7 +121,7 @@ bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
LaterInstCount = getCFAluSize(LatrCFAlu);
unsigned CumuledInsts = RootInstCount + LaterInstCount;
if (CumuledInsts >= TII->getMaxAlusPerClause()) {
- DEBUG(dbgs() << "Excess inst counts\n");
+ LLVM_DEBUG(dbgs() << "Excess inst counts\n");
return false;
}
if (RootCFAlu.getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
@@ -139,7 +139,7 @@ bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
RootCFAlu.getOperand(KBank0Idx).getImm() ||
LatrCFAlu.getOperand(KBank0LineIdx).getImm() !=
RootCFAlu.getOperand(KBank0LineIdx).getImm())) {
- DEBUG(dbgs() << "Wrong KC0\n");
+ LLVM_DEBUG(dbgs() << "Wrong KC0\n");
return false;
}
// Is KCache Bank 1 compatible ?
@@ -155,7 +155,7 @@ bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
RootCFAlu.getOperand(KBank1Idx).getImm() ||
LatrCFAlu.getOperand(KBank1LineIdx).getImm() !=
RootCFAlu.getOperand(KBank1LineIdx).getImm())) {
- DEBUG(dbgs() << "Wrong KC0\n");
+ LLVM_DEBUG(dbgs() << "Wrong KC0\n");
return false;
}
if (LatrCFAlu.getOperand(Mode0Idx).getImm()) {
diff --git a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
index 0fbc254486d..b4ec8dfb343 100644
--- a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -531,7 +531,7 @@ public:
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E;) {
if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) {
- DEBUG(dbgs() << CfCount << ":"; I->dump(););
+ LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump(););
FetchClauses.push_back(MakeFetchClause(MBB, I));
CfCount++;
LastAlu.back() = nullptr;
@@ -549,7 +549,8 @@ public:
switch (MI->getOpcode()) {
case AMDGPU::CF_ALU_PUSH_BEFORE:
if (RequiresWorkAround) {
- DEBUG(dbgs() << "Applying bug work-around for ALU_PUSH_BEFORE\n");
+ LLVM_DEBUG(dbgs()
+ << "Applying bug work-around for ALU_PUSH_BEFORE\n");
BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG))
.addImm(CfCount + 1)
.addImm(1);
@@ -562,7 +563,7 @@ public:
case AMDGPU::CF_ALU:
I = MI;
AluClauses.push_back(MakeALUClause(MBB, I));
- DEBUG(dbgs() << CfCount << ":"; MI->dump(););
+ LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
CfCount++;
break;
case AMDGPU::WHILELOOP: {
@@ -597,7 +598,7 @@ public:
.addImm(0)
.addImm(0);
IfThenElseStack.push_back(MIb);
- DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+ LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
MI->eraseFromParent();
CfCount++;
break;
@@ -610,7 +611,7 @@ public:
getHWInstrDesc(CF_ELSE))
.addImm(0)
.addImm(0);
- DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+ LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
IfThenElseStack.push_back(MIb);
MI->eraseFromParent();
CfCount++;
@@ -626,7 +627,7 @@ public:
.addImm(CfCount + 1)
.addImm(1);
(void)MIb;
- DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+ LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
CfCount++;
}
@@ -673,7 +674,7 @@ public:
}
default:
if (TII->isExport(MI->getOpcode())) {
- DEBUG(dbgs() << CfCount << ":"; MI->dump(););
+ LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
CfCount++;
}
break;
diff --git a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
index f8d062ef52d..4bb4c037a44 100644
--- a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
@@ -78,7 +78,7 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
AllowSwitchFromAlu = true;
} else {
unsigned NeededWF = 62.5f / ALUFetchRationEstimate;
- DEBUG( dbgs() << NeededWF << " approx. Wavefronts Required\n" );
+ LLVM_DEBUG(dbgs() << NeededWF << " approx. Wavefronts Required\n");
// We assume the local GPR requirements to be "dominated" by the requirement
// of the TEX clause (which consumes 128 bits regs) ; ALU inst before and
// after TEX are indeed likely to consume or generate values from/for the
@@ -124,26 +124,24 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
NextInstKind = IDOther;
}
- DEBUG(
- if (SU) {
- dbgs() << " ** Pick node **\n";
- SU->dump(DAG);
- } else {
- dbgs() << "NO NODE \n";
- for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
- const SUnit &S = DAG->SUnits[i];
- if (!S.isScheduled)
- S.dump(DAG);
- }
- }
- );
+ LLVM_DEBUG(if (SU) {
+ dbgs() << " ** Pick node **\n";
+ SU->dump(DAG);
+ } else {
+ dbgs() << "NO NODE \n";
+ for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
+ const SUnit &S = DAG->SUnits[i];
+ if (!S.isScheduled)
+ S.dump(DAG);
+ }
+ });
return SU;
}
void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
if (NextInstKind != CurInstKind) {
- DEBUG(dbgs() << "Instruction Type Switch\n");
+ LLVM_DEBUG(dbgs() << "Instruction Type Switch\n");
if (NextInstKind != IDAlu)
OccupedSlotsMask |= 31;
CurEmitted = 0;
@@ -172,8 +170,7 @@ void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
++CurEmitted;
}
-
- DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
+ LLVM_DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
if (CurInstKind != IDFetch) {
MoveUnits(Pending[IDFetch], Available[IDFetch]);
@@ -190,11 +187,11 @@ isPhysicalRegCopy(MachineInstr *MI) {
}
void R600SchedStrategy::releaseTopNode(SUnit *SU) {
- DEBUG(dbgs() << "Top Releasing ";SU->dump(DAG););
+ LLVM_DEBUG(dbgs() << "Top Releasing "; SU->dump(DAG););
}
void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
- DEBUG(dbgs() << "Bottom Releasing ";SU->dump(DAG););
+ LLVM_DEBUG(dbgs() << "Bottom Releasing "; SU->dump(DAG););
if (isPhysicalRegCopy(SU->getInstr())) {
PhysicalRegCopy.push_back(SU);
return;
@@ -345,7 +342,7 @@ void R600SchedStrategy::LoadAlu() {
}
void R600SchedStrategy::PrepareNextSlot() {
- DEBUG(dbgs() << "New Slot\n");
+ LLVM_DEBUG(dbgs() << "New Slot\n");
assert (OccupedSlotsMask && "Slot wasn't filled");
OccupedSlotsMask = 0;
// if (HwGen == R600Subtarget::NORTHERN_ISLANDS)
diff --git a/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
index 4a14d95f1cc..cb46855f475 100644
--- a/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
+++ b/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
@@ -228,20 +228,20 @@ MachineInstr *R600VectorRegMerger::RebuildVector(
UpdatedUndef.erase(ChanPos);
assert(!is_contained(UpdatedUndef, Chan) &&
"UpdatedUndef shouldn't contain Chan more than once!");
- DEBUG(dbgs() << " ->"; Tmp->dump(););
+ LLVM_DEBUG(dbgs() << " ->"; Tmp->dump(););
(void)Tmp;
SrcVec = DstReg;
}
MachineInstr *NewMI =
BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg).addReg(SrcVec);
- DEBUG(dbgs() << " ->"; NewMI->dump(););
+ LLVM_DEBUG(dbgs() << " ->"; NewMI->dump(););
- DEBUG(dbgs() << " Updating Swizzle:\n");
+ LLVM_DEBUG(dbgs() << " Updating Swizzle:\n");
for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg),
E = MRI->use_instr_end(); It != E; ++It) {
- DEBUG(dbgs() << " ";(*It).dump(); dbgs() << " ->");
+ LLVM_DEBUG(dbgs() << " "; (*It).dump(); dbgs() << " ->");
SwizzleInput(*It, RemapChan);
- DEBUG((*It).dump());
+ LLVM_DEBUG((*It).dump());
}
RSI->Instr->eraseFromParent();
@@ -372,14 +372,14 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
if (!areAllUsesSwizzeable(Reg))
continue;
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Trying to optimize ";
MI.dump();
});
RegSeqInfo CandidateRSI;
std::vector<std::pair<unsigned, unsigned>> RemapChan;
- DEBUG(dbgs() << "Using common slots...\n";);
+ LLVM_DEBUG(dbgs() << "Using common slots...\n";);
if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) {
// Remove CandidateRSI mapping
RemoveMI(CandidateRSI.Instr);
@@ -387,7 +387,7 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
trackRSI(RSI);
continue;
}
- DEBUG(dbgs() << "Using free slots...\n";);
+ LLVM_DEBUG(dbgs() << "Using free slots...\n";);
RemapChan.clear();
if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) {
RemoveMI(CandidateRSI.Instr);
diff --git a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
index 7340318d2d8..069e9dcb123 100644
--- a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
+++ b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
@@ -236,7 +236,7 @@ public:
if (ConsideredInstUsesAlreadyWrittenVectorElement &&
!TII->isVectorOnly(MI) && VLIW5) {
isTransSlot = true;
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Considering as Trans Inst :";
MI.dump();
});
@@ -249,7 +249,7 @@ public:
// Are the Constants limitations met ?
CurrentPacketMIs.push_back(&MI);
if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) {
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Couldn't pack :\n";
MI.dump();
dbgs() << "with the following packets :\n";
@@ -266,7 +266,7 @@ public:
// Is there a BankSwizzle set that meet Read Port limitations ?
if (!TII->fitsReadPortLimitations(CurrentPacketMIs,
PV, BS, isTransSlot)) {
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Couldn't pack :\n";
MI.dump();
dbgs() << "with the following packets :\n";
diff --git a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index 11fea5d6ee7..8ef4315e67f 100644
--- a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -201,7 +201,7 @@ bool SIAnnotateControlFlow::isElse(PHINode *Phi) {
// Erase "Phi" if it is not used any more
void SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) {
if (RecursivelyDeleteDeadPHINode(Phi)) {
- DEBUG(dbgs() << "Erased unused condition phi\n");
+ LLVM_DEBUG(dbgs() << "Erased unused condition phi\n");
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index e26bc99bd4b..033d08a42b6 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -513,9 +513,9 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
if (MDT.dominates(MI1, MI2)) {
if (!intereferes(MI2, MI1)) {
- DEBUG(dbgs() << "Erasing from "
- << printMBBReference(*MI2->getParent()) << " "
- << *MI2);
+ LLVM_DEBUG(dbgs()
+ << "Erasing from "
+ << printMBBReference(*MI2->getParent()) << " " << *MI2);
MI2->eraseFromParent();
Defs.erase(I2++);
Changed = true;
@@ -523,9 +523,9 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
}
} else if (MDT.dominates(MI2, MI1)) {
if (!intereferes(MI1, MI2)) {
- DEBUG(dbgs() << "Erasing from "
- << printMBBReference(*MI1->getParent()) << " "
- << *MI1);
+ LLVM_DEBUG(dbgs()
+ << "Erasing from "
+ << printMBBReference(*MI1->getParent()) << " " << *MI1);
MI1->eraseFromParent();
Defs.erase(I1++);
Changed = true;
@@ -541,11 +541,12 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
MachineBasicBlock::iterator I = MBB->getFirstNonPHI();
if (!intereferes(MI1, I) && !intereferes(MI2, I)) {
- DEBUG(dbgs() << "Erasing from "
- << printMBBReference(*MI1->getParent()) << " " << *MI1
- << "and moving from "
- << printMBBReference(*MI2->getParent()) << " to "
- << printMBBReference(*I->getParent()) << " " << *MI2);
+ LLVM_DEBUG(dbgs()
+ << "Erasing from "
+ << printMBBReference(*MI1->getParent()) << " " << *MI1
+ << "and moving from "
+ << printMBBReference(*MI2->getParent()) << " to "
+ << printMBBReference(*I->getParent()) << " " << *MI2);
I->getParent()->splice(I, MI2->getParent(), MI2);
MI1->eraseFromParent();
Defs.erase(I1++);
@@ -633,7 +634,8 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
if (!predsHasDivergentTerminator(MBB0, TRI) &&
!predsHasDivergentTerminator(MBB1, TRI)) {
- DEBUG(dbgs() << "Not fixing PHI for uniform branch: " << MI << '\n');
+ LLVM_DEBUG(dbgs()
+ << "Not fixing PHI for uniform branch: " << MI << '\n');
break;
}
}
@@ -673,7 +675,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
SmallSet<unsigned, 8> Visited;
if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) {
- DEBUG(dbgs() << "Fixing PHI: " << MI);
+ LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
TII->moveToVALU(MI);
}
break;
@@ -685,7 +687,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
+ LLVM_DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
TII->moveToVALU(MI);
break;
@@ -696,7 +698,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
if (TRI->isSGPRClass(DstRC) &&
(TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
- DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
+ LLVM_DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
TII->moveToVALU(MI);
}
break;
diff --git a/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
index 7a3caf4db71..dec88084708 100644
--- a/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
@@ -58,7 +58,7 @@ bool SIFixVGPRCopies::runOnMachineFunction(MachineFunction &MF) {
if (TII->isVGPRCopy(MI) && !MI.readsRegister(AMDGPU::EXEC, TRI)) {
MI.addOperand(MF,
MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
- DEBUG(dbgs() << "Add exec use to " << MI);
+ LLVM_DEBUG(dbgs() << "Add exec use to " << MI);
Changed = true;
}
break;
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index e4f121368a4..d41d151492d 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -637,14 +637,14 @@ static bool tryFoldInst(const SIInstrInfo *TII,
const MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
const MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1);
if (Src1->isIdenticalTo(*Src0)) {
- DEBUG(dbgs() << "Folded " << *MI << " into ");
+ LLVM_DEBUG(dbgs() << "Folded " << *MI << " into ");
int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
if (Src2Idx != -1)
MI->RemoveOperand(Src2Idx);
MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
mutateCopyOp(*MI, TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY
: getMovOpc(false)));
- DEBUG(dbgs() << *MI << '\n');
+ LLVM_DEBUG(dbgs() << *MI << '\n');
return true;
}
}
@@ -685,7 +685,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
// be folded due to multiple uses or operand constraints.
if (OpToFold.isImm() && tryConstantFoldOp(*MRI, TII, UseMI, &OpToFold)) {
- DEBUG(dbgs() << "Constant folded " << *UseMI <<'\n');
+ LLVM_DEBUG(dbgs() << "Constant folded " << *UseMI << '\n');
// Some constant folding cases change the same immediate's use to a new
// instruction, e.g. and x, 0 -> 0. Make sure we re-visit the user
@@ -752,8 +752,9 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
// copies.
MRI->clearKillFlags(Fold.OpToFold->getReg());
}
- DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
- static_cast<int>(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n');
+ LLVM_DEBUG(dbgs() << "Folded source from " << MI << " into OpNo "
+ << static_cast<int>(Fold.UseOpNo) << " of "
+ << *Fold.UseMI << '\n');
tryFoldInst(TII, Fold.UseMI);
} else if (Fold.isCommuted()) {
// Restoring instruction's original operand order if fold has failed.
@@ -833,7 +834,8 @@ bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
if (!DefClamp)
return false;
- DEBUG(dbgs() << "Folding clamp " << *DefClamp << " into " << *Def << '\n');
+ LLVM_DEBUG(dbgs() << "Folding clamp " << *DefClamp << " into " << *Def
+ << '\n');
// Clamp is applied after omod, so it is OK if omod is set.
DefClamp->setImm(1);
@@ -956,7 +958,7 @@ bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
if (TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
return false;
- DEBUG(dbgs() << "Folding omod " << MI << " into " << *Def << '\n');
+ LLVM_DEBUG(dbgs() << "Folding omod " << MI << " into " << *Def << '\n');
DefOMod->setImm(OMod);
MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index e6aaaf94751..0e1c2bc3172 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -350,9 +350,7 @@ public:
void setWaitcnt(MachineInstr *WaitcntIn) { LfWaitcnt = WaitcntIn; }
MachineInstr *getWaitcnt() const { return LfWaitcnt; }
- void print() {
- DEBUG(dbgs() << " iteration " << IterCnt << '\n';);
- }
+ void print() { LLVM_DEBUG(dbgs() << " iteration " << IterCnt << '\n';); }
private:
// s_waitcnt added at the end of loop footer to stablize wait scores
@@ -515,7 +513,7 @@ void BlockWaitcntBrackets::setExpScore(const MachineInstr *MI,
const MachineRegisterInfo *MRI,
unsigned OpNo, int32_t Val) {
RegInterval Interval = getRegInterval(MI, TII, MRI, TRI, OpNo, false);
- DEBUG({
+ LLVM_DEBUG({
const MachineOperand &Opnd = MI->getOperand(OpNo);
assert(TRI->isVGPR(*MRI, Opnd.getReg()));
});
@@ -1206,8 +1204,9 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
ScoreBracket = BlockWaitcntBracketsMap[TBB].get();
}
ScoreBracket->setRevisitLoop(true);
- DEBUG(dbgs() << "set-revisit: Block"
- << ContainingLoop->getHeader()->getNumber() << '\n';);
+ LLVM_DEBUG(dbgs()
+ << "set-revisit: Block"
+ << ContainingLoop->getHeader()->getNumber() << '\n';);
}
}
@@ -1242,26 +1241,29 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(
if (insertSWaitInst) {
if (OldWaitcnt && OldWaitcnt->getOpcode() == AMDGPU::S_WAITCNT) {
if (ForceEmitZeroWaitcnts)
- DEBUG(dbgs() << "Force emit s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)\n");
+ LLVM_DEBUG(
+ dbgs()
+ << "Force emit s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)\n");
if (IsForceEmitWaitcnt)
- DEBUG(dbgs() << "Force emit a s_waitcnt due to debug counter\n");
+ LLVM_DEBUG(dbgs()
+ << "Force emit a s_waitcnt due to debug counter\n");
OldWaitcnt->getOperand(0).setImm(Enc);
if (!OldWaitcnt->getParent())
MI.getParent()->insert(MI, OldWaitcnt);
- DEBUG(dbgs() << "updateWaitcntInBlock\n"
- << "Old Instr: " << MI << '\n'
- << "New Instr: " << *OldWaitcnt << '\n');
+ LLVM_DEBUG(dbgs() << "updateWaitcntInBlock\n"
+ << "Old Instr: " << MI << '\n'
+ << "New Instr: " << *OldWaitcnt << '\n');
} else {
auto SWaitInst = BuildMI(*MI.getParent(), MI.getIterator(),
MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT))
.addImm(Enc);
TrackedWaitcntSet.insert(SWaitInst);
- DEBUG(dbgs() << "insertWaitcntInBlock\n"
- << "Old Instr: " << MI << '\n'
- << "New Instr: " << *SWaitInst << '\n');
+ LLVM_DEBUG(dbgs() << "insertWaitcntInBlock\n"
+ << "Old Instr: " << MI << '\n'
+ << "New Instr: " << *SWaitInst << '\n');
}
}
@@ -1670,7 +1672,7 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&Block].get();
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "*** Block" << Block.getNumber() << " ***";
ScoreBrackets->dump();
});
@@ -1731,7 +1733,7 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
ScoreBrackets->clearWaitcnt();
- DEBUG({
+ LLVM_DEBUG({
Inst.print(dbgs());
ScoreBrackets->dump();
});
@@ -1771,7 +1773,7 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
if (ContainingLoop && isLoopBottom(ContainingLoop, &Block)) {
LoopWaitcntData *WaitcntData = LoopWaitcntDataMap[ContainingLoop].get();
WaitcntData->print();
- DEBUG(dbgs() << '\n';);
+ LLVM_DEBUG(dbgs() << '\n';);
// The iterative waitcnt insertion algorithm aims for optimal waitcnt
// placement and doesn't always guarantee convergence for a loop. Each
@@ -1811,7 +1813,7 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
}
if (SWaitInst) {
- DEBUG({
+ LLVM_DEBUG({
SWaitInst->print(dbgs());
dbgs() << "\nAdjusted score board:";
ScoreBrackets->dump();
@@ -1896,8 +1898,8 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
if ((std::count(BlockWaitcntProcessedSet.begin(),
BlockWaitcntProcessedSet.end(), &MBB) < Count)) {
BlockWaitcntBracketsMap[&MBB]->setRevisitLoop(true);
- DEBUG(dbgs() << "set-revisit: Block"
- << ContainingLoop->getHeader()->getNumber() << '\n';);
+ LLVM_DEBUG(dbgs() << "set-revisit: Block"
+ << ContainingLoop->getHeader()->getNumber() << '\n';);
}
}
@@ -1931,7 +1933,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
}
LoopWaitcntData *WaitcntData = LoopWaitcntDataMap[ContainingLoop].get();
WaitcntData->incIterCnt();
- DEBUG(dbgs() << "revisit: Block" << EntryBB->getNumber() << '\n';);
+ LLVM_DEBUG(dbgs() << "revisit: Block" << EntryBB->getNumber() << '\n';);
continue;
} else {
LoopWaitcntData *WaitcntData = LoopWaitcntDataMap[ContainingLoop].get();
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index e5304818afb..1ebc45dc9ee 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -553,7 +553,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
CI.I->eraseFromParent();
CI.Paired->eraseFromParent();
- DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n');
+ LLVM_DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n');
return Next;
}
@@ -631,7 +631,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
CI.I->eraseFromParent();
CI.Paired->eraseFromParent();
- DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n');
+ LLVM_DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n');
return Next;
}
@@ -950,7 +950,7 @@ bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) {
assert(MRI->isSSA() && "Must be run on SSA");
- DEBUG(dbgs() << "Running SILoadStoreOptimizer\n");
+ LLVM_DEBUG(dbgs() << "Running SILoadStoreOptimizer\n");
bool Modified = false;
diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
index 0fd4c6bfed9..86f81136bc5 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -1207,7 +1207,7 @@ void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVaria
NextReservedID = 1;
NextNonReservedID = DAGSize + 1;
- DEBUG(dbgs() << "Coloring the graph\n");
+ LLVM_DEBUG(dbgs() << "Coloring the graph\n");
if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesGrouped)
colorHighLatenciesGroups();
@@ -1264,13 +1264,11 @@ void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVaria
SIScheduleBlock *Block = CurrentBlocks[i];
Block->finalizeUnits();
}
- DEBUG(
- dbgs() << "Blocks created:\n\n";
- for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
- SIScheduleBlock *Block = CurrentBlocks[i];
- Block->printDebug(true);
- }
- );
+ LLVM_DEBUG(dbgs() << "Blocks created:\n\n";
+ for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
+ SIScheduleBlock *Block = CurrentBlocks[i];
+ Block->printDebug(true);
+ });
}
// Two functions taken from Codegen/MachineScheduler.cpp
@@ -1290,7 +1288,7 @@ void SIScheduleBlockCreator::topologicalSort() {
unsigned DAGSize = CurrentBlocks.size();
std::vector<int> WorkList;
- DEBUG(dbgs() << "Topological Sort\n");
+ LLVM_DEBUG(dbgs() << "Topological Sort\n");
WorkList.reserve(DAGSize);
TopDownIndex2Block.resize(DAGSize);
@@ -1337,11 +1335,11 @@ void SIScheduleBlockCreator::topologicalSort() {
void SIScheduleBlockCreator::scheduleInsideBlocks() {
unsigned DAGSize = CurrentBlocks.size();
- DEBUG(dbgs() << "\nScheduling Blocks\n\n");
+ LLVM_DEBUG(dbgs() << "\nScheduling Blocks\n\n");
// We do schedule a valid scheduling such that a Block corresponds
// to a range of instructions.
- DEBUG(dbgs() << "First phase: Fast scheduling for Reg Liveness\n");
+ LLVM_DEBUG(dbgs() << "First phase: Fast scheduling for Reg Liveness\n");
for (unsigned i = 0, e = DAGSize; i != e; ++i) {
SIScheduleBlock *Block = CurrentBlocks[i];
Block->fastSchedule();
@@ -1395,7 +1393,7 @@ void SIScheduleBlockCreator::scheduleInsideBlocks() {
Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());
}
- DEBUG(dbgs() << "Restoring MI Pos\n");
+ LLVM_DEBUG(dbgs() << "Restoring MI Pos\n");
// Restore old ordering (which prevents a LIS->handleMove bug).
for (unsigned i = PosOld.size(), e = 0; i != e; --i) {
MachineBasicBlock::iterator POld = PosOld[i-1];
@@ -1409,12 +1407,10 @@ void SIScheduleBlockCreator::scheduleInsideBlocks() {
}
}
- DEBUG(
- for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
- SIScheduleBlock *Block = CurrentBlocks[i];
- Block->printDebug(true);
- }
- );
+ LLVM_DEBUG(for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
+ SIScheduleBlock *Block = CurrentBlocks[i];
+ Block->printDebug(true);
+ });
}
void SIScheduleBlockCreator::fillStats() {
@@ -1565,13 +1561,10 @@ SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
blockScheduled(Block);
}
- DEBUG(
- dbgs() << "Block Order:";
- for (SIScheduleBlock* Block : BlocksScheduled) {
- dbgs() << ' ' << Block->getID();
- }
- dbgs() << '\n';
- );
+ LLVM_DEBUG(dbgs() << "Block Order:"; for (SIScheduleBlock *Block
+ : BlocksScheduled) {
+ dbgs() << ' ' << Block->getID();
+ } dbgs() << '\n';);
}
bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
@@ -1634,18 +1627,17 @@ SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() {
maxVregUsage = VregCurrentUsage;
if (SregCurrentUsage > maxSregUsage)
maxSregUsage = SregCurrentUsage;
- DEBUG(
- dbgs() << "Picking New Blocks\n";
- dbgs() << "Available: ";
- for (SIScheduleBlock* Block : ReadyBlocks)
- dbgs() << Block->getID() << ' ';
- dbgs() << "\nCurrent Live:\n";
- for (unsigned Reg : LiveRegs)
- dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';
- dbgs() << '\n';
- dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n';
- dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';
- );
+ LLVM_DEBUG(dbgs() << "Picking New Blocks\n"; dbgs() << "Available: ";
+ for (SIScheduleBlock *Block
+ : ReadyBlocks) dbgs()
+ << Block->getID() << ' ';
+ dbgs() << "\nCurrent Live:\n";
+ for (unsigned Reg
+ : LiveRegs) dbgs()
+ << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';
+ dbgs() << '\n';
+ dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n';
+ dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';);
Cand.Block = nullptr;
for (std::vector<SIScheduleBlock*>::iterator I = ReadyBlocks.begin(),
@@ -1677,20 +1669,18 @@ SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() {
if (TryCand.Reason != NoCand) {
Cand.setBest(TryCand);
Best = I;
- DEBUG(dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' '
- << getReasonStr(Cand.Reason) << '\n');
+ LLVM_DEBUG(dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' '
+ << getReasonStr(Cand.Reason) << '\n');
}
}
- DEBUG(
- dbgs() << "Picking: " << Cand.Block->getID() << '\n';
- dbgs() << "Is a block with high latency instruction: "
- << (Cand.IsHighLatency ? "yes\n" : "no\n");
- dbgs() << "Position of last high latency dependency: "
- << Cand.LastPosHighLatParentScheduled << '\n';
- dbgs() << "VGPRUsageDiff: " << Cand.VGPRUsageDiff << '\n';
- dbgs() << '\n';
- );
+ LLVM_DEBUG(dbgs() << "Picking: " << Cand.Block->getID() << '\n';
+ dbgs() << "Is a block with high latency instruction: "
+ << (Cand.IsHighLatency ? "yes\n" : "no\n");
+ dbgs() << "Position of last high latency dependency: "
+ << Cand.LastPosHighLatParentScheduled << '\n';
+ dbgs() << "VGPRUsageDiff: " << Cand.VGPRUsageDiff << '\n';
+ dbgs() << '\n';);
Block = Cand.Block;
ReadyBlocks.erase(Best);
@@ -1939,13 +1929,10 @@ void SIScheduleDAGMI::schedule()
{
SmallVector<SUnit*, 8> TopRoots, BotRoots;
SIScheduleBlockResult Best, Temp;
- DEBUG(dbgs() << "Preparing Scheduling\n");
+ LLVM_DEBUG(dbgs() << "Preparing Scheduling\n");
buildDAGWithRegPressure();
- DEBUG(
- for(SUnit& SU : SUnits)
- SU.dumpAll(this)
- );
+ LLVM_DEBUG(for (SUnit &SU : SUnits) SU.dumpAll(this));
topologicalSort();
findRootsAndBiasEdges(TopRoots, BotRoots);
@@ -2047,15 +2034,15 @@ void SIScheduleDAGMI::schedule()
scheduleMI(SU, true);
- DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
- << *SU->getInstr());
+ LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
+ << *SU->getInstr());
}
assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
placeDebugValues();
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) << " ***\n";
dumpSchedule();
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
index ddf45bbccb9..b68df539771 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
@@ -243,11 +243,11 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
// Fold exec = COPY (S_AND_B64 reg, exec) -> exec = S_AND_B64 reg, exec
if (CopyToExecInst->getOperand(1).isKill() &&
isLogicalOpOnExec(*PrepareExecInst) == CopyToExec) {
- DEBUG(dbgs() << "Fold exec copy: " << *PrepareExecInst);
+ LLVM_DEBUG(dbgs() << "Fold exec copy: " << *PrepareExecInst);
PrepareExecInst->getOperand(0).setReg(AMDGPU::EXEC);
- DEBUG(dbgs() << "into: " << *PrepareExecInst << '\n');
+ LLVM_DEBUG(dbgs() << "into: " << *PrepareExecInst << '\n');
CopyToExecInst->eraseFromParent();
}
@@ -257,7 +257,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
if (isLiveOut(MBB, CopyToExec)) {
// The copied register is live out and has a second use in another block.
- DEBUG(dbgs() << "Exec copy source register is live out\n");
+ LLVM_DEBUG(dbgs() << "Exec copy source register is live out\n");
continue;
}
@@ -269,7 +269,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
= std::next(CopyFromExecInst->getIterator()), JE = I->getIterator();
J != JE; ++J) {
if (SaveExecInst && J->readsRegister(AMDGPU::EXEC, TRI)) {
- DEBUG(dbgs() << "exec read prevents saveexec: " << *J << '\n');
+ LLVM_DEBUG(dbgs() << "exec read prevents saveexec: " << *J << '\n');
// Make sure this is inserted after any VALU ops that may have been
// scheduled in between.
SaveExecInst = nullptr;
@@ -280,8 +280,8 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
if (J->modifiesRegister(CopyToExec, TRI)) {
if (SaveExecInst) {
- DEBUG(dbgs() << "Multiple instructions modify "
- << printReg(CopyToExec, TRI) << '\n');
+ LLVM_DEBUG(dbgs() << "Multiple instructions modify "
+ << printReg(CopyToExec, TRI) << '\n');
SaveExecInst = nullptr;
break;
}
@@ -292,10 +292,11 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
if (ReadsCopyFromExec) {
SaveExecInst = &*J;
- DEBUG(dbgs() << "Found save exec op: " << *SaveExecInst << '\n');
+ LLVM_DEBUG(dbgs() << "Found save exec op: " << *SaveExecInst << '\n');
continue;
} else {
- DEBUG(dbgs() << "Instruction does not read exec copy: " << *J << '\n');
+ LLVM_DEBUG(dbgs()
+ << "Instruction does not read exec copy: " << *J << '\n');
break;
}
} else if (ReadsCopyFromExec && !SaveExecInst) {
@@ -307,8 +308,8 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
// spill %sgpr0_sgpr1
// %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1
//
- DEBUG(dbgs() << "Found second use of save inst candidate: "
- << *J << '\n');
+ LLVM_DEBUG(dbgs() << "Found second use of save inst candidate: " << *J
+ << '\n');
break;
}
@@ -321,7 +322,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
if (!SaveExecInst)
continue;
- DEBUG(dbgs() << "Insert save exec op: " << *SaveExecInst << '\n');
+ LLVM_DEBUG(dbgs() << "Insert save exec op: " << *SaveExecInst << '\n');
MachineOperand &Src0 = SaveExecInst->getOperand(1);
MachineOperand &Src1 = SaveExecInst->getOperand(2);
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index b7bb80acb71..c9e3e5696f3 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -143,7 +143,8 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
I->hasUnmodeledSideEffects() || I->hasOrderedMemoryRef())
break;
- DEBUG(dbgs() << "Removing no effect instruction: " << *I << '\n');
+ LLVM_DEBUG(dbgs()
+ << "Removing no effect instruction: " << *I << '\n');
for (auto &Op : I->operands()) {
if (Op.isReg())
@@ -193,7 +194,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
!getOrExecSource(*NextLead, *TII, MRI))
continue;
- DEBUG(dbgs() << "Redundant EXEC = S_OR_B64 found: " << *Lead << '\n');
+ LLVM_DEBUG(dbgs() << "Redundant EXEC = S_OR_B64 found: " << *Lead << '\n');
auto SaveExec = getOrExecSource(*Lead, *TII, MRI);
unsigned SaveExecReg = getOrNonExecReg(*Lead, *TII);
@@ -224,7 +225,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
break;
}
- DEBUG(dbgs() << "Redundant EXEC COPY: " << *SaveExec << '\n');
+ LLVM_DEBUG(dbgs() << "Redundant EXEC COPY: " << *SaveExec << '\n');
}
if (SafeToReplace) {
diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 7a56b4d1bd6..6f9d7522872 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -846,7 +846,7 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) {
for (MachineInstr &MI : MBB) {
if (auto Operand = matchSDWAOperand(MI)) {
- DEBUG(dbgs() << "Match: " << MI << "To: " << *Operand << '\n');
+ LLVM_DEBUG(dbgs() << "Match: " << MI << "To: " << *Operand << '\n');
SDWAOperands[&MI] = std::move(Operand);
++NumSDWAPatternsFound;
}
@@ -901,7 +901,7 @@ bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI,
bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
const SDWAOperandsVector &SDWAOperands) {
- DEBUG(dbgs() << "Convert instruction:" << MI);
+ LLVM_DEBUG(dbgs() << "Convert instruction:" << MI);
// Convert to sdwa
int SDWAOpcode;
@@ -1050,7 +1050,7 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
// Apply all sdwa operand patterns.
bool Converted = false;
for (auto &Operand : SDWAOperands) {
- DEBUG(dbgs() << *SDWAInst << "\nOperand: " << *Operand);
+ LLVM_DEBUG(dbgs() << *SDWAInst << "\nOperand: " << *Operand);
// There should be no intesection between SDWA operands and potential MIs
// e.g.:
// v_and_b32 v0, 0xff, v1 -> src:v1 sel:BYTE_0
@@ -1071,7 +1071,7 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
return false;
}
- DEBUG(dbgs() << "\nInto:" << *SDWAInst << '\n');
+ LLVM_DEBUG(dbgs() << "\nInto:" << *SDWAInst << '\n');
++NumSDWAInstructionsPeepholed;
MI.eraseFromParent();
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 33fd5a30791..3c4c3baf799 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -495,7 +495,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
}
// We can shrink this instruction
- DEBUG(dbgs() << "Shrinking " << MI);
+ LLVM_DEBUG(dbgs() << "Shrinking " << MI);
MachineInstrBuilder Inst32 =
BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32));
@@ -539,9 +539,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
MI.eraseFromParent();
foldImmediates(*Inst32, TII, MRI);
- DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
-
-
+ LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
}
}
return false;
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 89e5d56d1e0..7de132e0ed1 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -679,7 +679,8 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
if (!isEntry && BI.Needs == StateWQM && BI.OutNeeds != StateExact)
return;
- DEBUG(dbgs() << "\nProcessing block " << printMBBReference(MBB) << ":\n");
+ LLVM_DEBUG(dbgs() << "\nProcessing block " << printMBBReference(MBB)
+ << ":\n");
unsigned SavedWQMReg = 0;
unsigned SavedNonWWMReg = 0;
@@ -882,7 +883,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
}
}
- DEBUG(printInfo());
+ LLVM_DEBUG(printInfo());
lowerCopyInstrs();
OpenPOWER on IntegriCloud