diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp | 26 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/R600ISelLowering.cpp | 16 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/R600InstrInfo.cpp | 222 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/R600InstrInfo.h | 33 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/R600Packetizer.cpp | 20 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 850 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.h | 93 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp | 14 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp | 4 |
17 files changed, 656 insertions, 665 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index 9453fb06bb5..77f67b492f4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -92,7 +92,7 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) { AMDGPUMCInstLower MCInstLowering(OutContext, STI); StringRef Err; - if (!STI.getInstrInfo()->verifyInstruction(MI, Err)) { + if (!STI.getInstrInfo()->verifyInstruction(*MI, Err)) { LLVMContext &C = MI->getParent()->getParent()->getFunction()->getContext(); C.emitError("Illegal instruction detected: " + Err); MI->dump(); diff --git a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp index ef645f908b3..85ebae1d6ac 100644 --- a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp +++ b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp @@ -181,7 +181,7 @@ bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) { MachineBasicBlock::iterator LatestCFAlu = E; while (I != E) { MachineInstr *MI = I++; - if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) || + if ((!TII->canBeConsideredALU(*MI) && !isCFAlu(MI)) || TII->mustBeLastInClause(MI->getOpcode())) LatestCFAlu = E; if (!isCFAlu(MI)) diff --git a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp index fd2a688852d..75a35843cc7 100644 --- a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp +++ b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -318,15 +318,15 @@ private: MachineBasicBlock::iterator ClauseHead = I; std::vector<MachineInstr *> ClauseContent; unsigned AluInstCount = 0; - bool IsTex = TII->usesTextureCache(ClauseHead); + bool IsTex = TII->usesTextureCache(*ClauseHead); std::set<unsigned> DstRegs; for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { if (IsTrivialInst(I)) continue; if (AluInstCount >= MaxFetchInst) break; - if ((IsTex && !TII->usesTextureCache(I)) || - (!IsTex && !TII->usesVertexCache(I))) + if ((IsTex && !TII->usesTextureCache(*I)) || + (!IsTex && !TII->usesVertexCache(*I))) break; if (!isCompatibleWithClause(I, DstRegs)) break; @@ -347,8 +347,8 @@ private: AMDGPU::ALU_LITERAL_Z, AMDGPU::ALU_LITERAL_W }; - const SmallVector<std::pair<MachineOperand *, int64_t>, 3 > Srcs = - TII->getSrcs(MI); + const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = + TII->getSrcs(*MI); for (const auto &Src:Srcs) { if (Src.first->getReg() != AMDGPU::ALU_LITERAL_X) continue; @@ -516,7 +516,7 @@ public: for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) { - if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) { + if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) { DEBUG(dbgs() << CfCount << ":"; I->dump();); FetchClauses.push_back(MakeFetchClause(MBB, I)); CfCount++; diff --git a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp index a2d5f5be280..a8a6c2f1e28 100644 --- a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp +++ b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp @@ -122,8 +122,8 @@ private: if (!TII->isALUInstr(MI->getOpcode()) && MI->getOpcode() != AMDGPU::DOT_4) return true; - const SmallVectorImpl<std::pair<MachineOperand *, int64_t> > &Consts = - TII->getSrcs(MI); + const SmallVectorImpl<std::pair<MachineOperand *, int64_t>> &Consts = + TII->getSrcs(*MI); assert((TII->isALUInstr(MI->getOpcode()) || MI->getOpcode() == AMDGPU::DOT_4) && "Can't assign Const"); for (unsigned i = 0, n = Consts.size(); i < n; ++i) { @@ -245,7 +245,7 @@ private: // clause as predicated alus). if (AluInstCount > 0) break; - if (TII->getFlagOp(I).getImm() & MO_FLAG_PUSH) + if (TII->getFlagOp(*I).getImm() & MO_FLAG_PUSH) PushBeforeModifier = true; AluInstCount ++; continue; diff --git a/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp index 81219aec3ce..0385b6283f3 100644 --- a/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp +++ b/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp @@ -60,7 +60,7 @@ void R600ExpandSpecialInstrsPass::SetFlagInNewMI(MachineInstr *NewMI, int OpIdx = TII->getOperandIdx(*OldMI, Op); if (OpIdx > -1) { uint64_t Val = OldMI->getOperand(OpIdx).getImm(); - TII->setImmOperand(NewMI, Op, Val); + TII->setImmOperand(*NewMI, Op, Val); } } @@ -107,11 +107,11 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { MI.getOperand(0).getReg(), // dst MI.getOperand(1).getReg(), // src0 AMDGPU::ZERO); // src1 - TII->addFlag(PredSet, 0, MO_FLAG_MASK); + TII->addFlag(*PredSet, 0, MO_FLAG_MASK); if (Flags & MO_FLAG_PUSH) { - TII->setImmOperand(PredSet, AMDGPU::OpName::update_exec_mask, 1); + TII->setImmOperand(*PredSet, AMDGPU::OpName::update_exec_mask, 1); } else { - TII->setImmOperand(PredSet, AMDGPU::OpName::update_pred, 1); + TII->setImmOperand(*PredSet, AMDGPU::OpName::update_pred, 1); } MI.eraseFromParent(); continue; @@ -137,9 +137,9 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { BMI->bundleWithPred(); } if (Chan >= 2) - TII->addFlag(BMI, 0, MO_FLAG_MASK); + TII->addFlag(*BMI, 0, MO_FLAG_MASK); if (Chan != 3) - TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST); + TII->addFlag(*BMI, 0, MO_FLAG_NOT_LAST); } MI.eraseFromParent(); @@ -166,9 +166,9 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { BMI->bundleWithPred(); } if (Chan < 2) - TII->addFlag(BMI, 0, MO_FLAG_MASK); + TII->addFlag(*BMI, 0, MO_FLAG_MASK); if (Chan != 3) - TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST); + TII->addFlag(*BMI, 0, MO_FLAG_NOT_LAST); } MI.eraseFromParent(); @@ -189,7 +189,7 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { BMI->bundleWithPred(); } if (Chan != 3) - TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST); + TII->addFlag(*BMI, 0, MO_FLAG_NOT_LAST); } MI.eraseFromParent(); @@ -212,10 +212,10 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { BMI->bundleWithPred(); } if (Mask) { - TII->addFlag(BMI, 0, MO_FLAG_MASK); + TII->addFlag(*BMI, 0, MO_FLAG_MASK); } if (Chan != 3) - TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST); + TII->addFlag(*BMI, 0, MO_FLAG_NOT_LAST); unsigned Opcode = BMI->getOpcode(); // While not strictly necessary from hw point of view, we force // all src operands of a dot4 inst to belong to the same slot. @@ -330,10 +330,10 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { if (Chan != 0) NewMI->bundleWithPred(); if (Mask) { - TII->addFlag(NewMI, 0, MO_FLAG_MASK); + TII->addFlag(*NewMI, 0, MO_FLAG_MASK); } if (NotLast) { - TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); + TII->addFlag(*NewMI, 0, MO_FLAG_NOT_LAST); } SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::clamp); SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::literal); diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index f6e40ca3670..4851a02e775 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -242,7 +242,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( AMDGPU::MOV, MI->getOperand(0).getReg(), MI->getOperand(1).getReg()); - TII->addFlag(NewMI, 0, MO_FLAG_CLAMP); + TII->addFlag(*NewMI, 0, MO_FLAG_CLAMP); break; } @@ -251,7 +251,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( AMDGPU::MOV, MI->getOperand(0).getReg(), MI->getOperand(1).getReg()); - TII->addFlag(NewMI, 0, MO_FLAG_ABS); + TII->addFlag(*NewMI, 0, MO_FLAG_ABS); break; } @@ -260,7 +260,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( AMDGPU::MOV, MI->getOperand(0).getReg(), MI->getOperand(1).getReg()); - TII->addFlag(NewMI, 0, MO_FLAG_NEG); + TII->addFlag(*NewMI, 0, MO_FLAG_NEG); break; } @@ -268,7 +268,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( unsigned maskedRegister = MI->getOperand(0).getReg(); assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); - TII->addFlag(defInstr, 0, MO_FLAG_MASK); + TII->addFlag(*defInstr, 0, MO_FLAG_MASK); break; } @@ -294,8 +294,8 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( case AMDGPU::CONST_COPY: { MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV, MI->getOperand(0).getReg(), AMDGPU::ALU_CONST); - TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel, - MI->getOperand(1).getImm()); + TII->setImmOperand(*NewMI, AMDGPU::OpName::src0_sel, + MI->getOperand(1).getImm()); break; } @@ -532,7 +532,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( .addOperand(MI->getOperand(1)) .addImm(OPCODE_IS_NOT_ZERO) .addImm(0); // Flags - TII->addFlag(NewMI, 0, MO_FLAG_PUSH); + TII->addFlag(*NewMI, 0, MO_FLAG_PUSH); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND)) .addOperand(MI->getOperand(0)) .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); @@ -546,7 +546,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( .addOperand(MI->getOperand(1)) .addImm(OPCODE_IS_NOT_ZERO_INT) .addImm(0); // Flags - TII->addFlag(NewMI, 0, MO_FLAG_PUSH); + TII->addFlag(*NewMI, 0, MO_FLAG_PUSH); BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND)) .addOperand(MI->getOperand(0)) .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp index 85478e00522..808b4dcb778 100644 --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -152,12 +152,12 @@ bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const { return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1; } -bool R600InstrInfo::canBeConsideredALU(const MachineInstr *MI) const { - if (isALUInstr(MI->getOpcode())) +bool R600InstrInfo::canBeConsideredALU(const MachineInstr &MI) const { + if (isALUInstr(MI.getOpcode())) return true; - if (isVector(*MI) || isCubeOp(MI->getOpcode())) + if (isVector(MI) || isCubeOp(MI.getOpcode())) return true; - switch (MI->getOpcode()) { + switch (MI.getOpcode()) { case AMDGPU::PRED_X: case AMDGPU::INTERP_PAIR_XY: case AMDGPU::INTERP_PAIR_ZW: @@ -176,16 +176,16 @@ bool R600InstrInfo::isTransOnly(unsigned Opcode) const { return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU); } -bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const { - return isTransOnly(MI->getOpcode()); +bool R600InstrInfo::isTransOnly(const MachineInstr &MI) const { + return isTransOnly(MI.getOpcode()); } bool R600InstrInfo::isVectorOnly(unsigned Opcode) const { return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU); } -bool R600InstrInfo::isVectorOnly(const MachineInstr *MI) const { - return isVectorOnly(MI->getOpcode()); +bool R600InstrInfo::isVectorOnly(const MachineInstr &MI) const { + return isVectorOnly(MI.getOpcode()); } bool R600InstrInfo::isExport(unsigned Opcode) const { @@ -196,21 +196,21 @@ bool R600InstrInfo::usesVertexCache(unsigned Opcode) const { return ST.hasVertexCache() && IS_VTX(get(Opcode)); } -bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const { - const MachineFunction *MF = MI->getParent()->getParent(); +bool R600InstrInfo::usesVertexCache(const MachineInstr &MI) const { + const MachineFunction *MF = MI.getParent()->getParent(); return !AMDGPU::isCompute(MF->getFunction()->getCallingConv()) && - usesVertexCache(MI->getOpcode()); + usesVertexCache(MI.getOpcode()); } bool R600InstrInfo::usesTextureCache(unsigned Opcode) const { return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode)); } -bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const { - const MachineFunction *MF = MI->getParent()->getParent(); +bool R600InstrInfo::usesTextureCache(const MachineInstr &MI) const { + const MachineFunction *MF = MI.getParent()->getParent(); return (AMDGPU::isCompute(MF->getFunction()->getCallingConv()) && - usesVertexCache(MI->getOpcode())) || - usesTextureCache(MI->getOpcode()); + usesVertexCache(MI.getOpcode())) || + usesTextureCache(MI.getOpcode()); } bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const { @@ -223,20 +223,21 @@ bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const { } } -bool R600InstrInfo::usesAddressRegister(MachineInstr *MI) const { - return MI->findRegisterUseOperandIdx(AMDGPU::AR_X) != -1; +bool R600InstrInfo::usesAddressRegister(MachineInstr &MI) const { + return MI.findRegisterUseOperandIdx(AMDGPU::AR_X) != -1; } -bool R600InstrInfo::definesAddressRegister(MachineInstr *MI) const { - return MI->findRegisterDefOperandIdx(AMDGPU::AR_X) != -1; +bool R600InstrInfo::definesAddressRegister(MachineInstr &MI) const { + return MI.findRegisterDefOperandIdx(AMDGPU::AR_X) != -1; } -bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const { - if (!isALUInstr(MI->getOpcode())) { +bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const { + if (!isALUInstr(MI.getOpcode())) { return false; } - for (MachineInstr::const_mop_iterator I = MI->operands_begin(), - E = MI->operands_end(); I != E; ++I) { + for (MachineInstr::const_mop_iterator I = MI.operands_begin(), + E = MI.operands_end(); + I != E; ++I) { if (!I->isReg() || !I->isUse() || TargetRegisterInfo::isVirtualRegister(I->getReg())) continue; @@ -282,10 +283,10 @@ int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const { } SmallVector<std::pair<MachineOperand *, int64_t>, 3> -R600InstrInfo::getSrcs(MachineInstr *MI) const { +R600InstrInfo::getSrcs(MachineInstr &MI) const { SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result; - if (MI->getOpcode() == AMDGPU::DOT_4) { + if (MI.getOpcode() == AMDGPU::DOT_4) { static const unsigned OpTable[8][2] = { {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, @@ -298,12 +299,12 @@ R600InstrInfo::getSrcs(MachineInstr *MI) const { }; for (unsigned j = 0; j < 8; j++) { - MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(), - OpTable[j][0])); + MachineOperand &MO = + MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][0])); unsigned Reg = MO.getReg(); if (Reg == AMDGPU::ALU_CONST) { - MachineOperand &Sel = MI->getOperand(getOperandIdx(MI->getOpcode(), - OpTable[j][1])); + MachineOperand &Sel = + MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1])); Result.push_back(std::make_pair(&MO, Sel.getImm())); continue; } @@ -319,20 +320,20 @@ R600InstrInfo::getSrcs(MachineInstr *MI) const { }; for (unsigned j = 0; j < 3; j++) { - int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]); + int SrcIdx = getOperandIdx(MI.getOpcode(), OpTable[j][0]); if (SrcIdx < 0) break; - MachineOperand &MO = MI->getOperand(SrcIdx); + MachineOperand &MO = MI.getOperand(SrcIdx); unsigned Reg = MO.getReg(); if (Reg == AMDGPU::ALU_CONST) { - MachineOperand &Sel = MI->getOperand( - getOperandIdx(MI->getOpcode(), OpTable[j][1])); + MachineOperand &Sel = + MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1])); Result.push_back(std::make_pair(&MO, Sel.getImm())); continue; } if (Reg == AMDGPU::ALU_LITERAL_X) { - MachineOperand &Operand = MI->getOperand( - getOperandIdx(MI->getOpcode(), AMDGPU::OpName::literal)); + MachineOperand &Operand = + MI.getOperand(getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal)); if (Operand.isImm()) { Result.push_back(std::make_pair(&MO, Operand.getImm())); continue; @@ -344,8 +345,8 @@ R600InstrInfo::getSrcs(MachineInstr *MI) const { return Result; } -std::vector<std::pair<int, unsigned> > -R600InstrInfo::ExtractSrcs(MachineInstr *MI, +std::vector<std::pair<int, unsigned>> +R600InstrInfo::ExtractSrcs(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PV, unsigned &ConstCount) const { ConstCount = 0; @@ -552,7 +553,7 @@ R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG, unsigned ConstCount; BankSwizzle TransBS = ALU_VEC_012_SCL_210; for (unsigned i = 0, e = IG.size(); i < e; ++i) { - IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount)); + IGSrcs.push_back(ExtractSrcs(*IG[i], PV, ConstCount)); unsigned Op = getOperandIdx(IG[i]->getOpcode(), AMDGPU::OpName::bank_swizzle); ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle) @@ -619,8 +620,8 @@ R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs) std::vector<unsigned> Consts; SmallSet<int64_t, 4> Literals; for (unsigned i = 0, n = MIs.size(); i < n; i++) { - MachineInstr *MI = MIs[i]; - if (!isALUInstr(MI->getOpcode())) + MachineInstr &MI = *MIs[i]; + if (!isALUInstr(MI.getOpcode())) continue; ArrayRef<std::pair<MachineOperand *, int64_t>> Srcs = getSrcs(MI); @@ -780,7 +781,7 @@ unsigned R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, } else { MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); assert(PredSet && "No previous predicate !"); - addFlag(PredSet, 0, MO_FLAG_PUSH); + addFlag(*PredSet, 0, MO_FLAG_PUSH); PredSet->getOperand(2).setImm(Cond[1].getImm()); BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) @@ -796,7 +797,7 @@ unsigned R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, } else { MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); assert(PredSet && "No previous predicate !"); - addFlag(PredSet, 0, MO_FLAG_PUSH); + addFlag(*PredSet, 0, MO_FLAG_PUSH); PredSet->getOperand(2).setImm(Cond[1].getImm()); BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) .addMBB(TBB) @@ -828,7 +829,7 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return 0; case AMDGPU::JUMP_COND: { MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); - clearFlag(predSet, 0, MO_FLAG_PUSH); + clearFlag(*predSet, 0, MO_FLAG_PUSH); I->eraseFromParent(); MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); if (CfAlu == MBB.end()) @@ -853,7 +854,7 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return 1; case AMDGPU::JUMP_COND: { MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); - clearFlag(predSet, 0, MO_FLAG_PUSH); + clearFlag(*predSet, 0, MO_FLAG_PUSH); I->eraseFromParent(); MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); if (CfAlu == MBB.end()) @@ -1026,7 +1027,7 @@ unsigned int R600InstrInfo::getPredicationCost(const MachineInstr &) const { } unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, - const MachineInstr *MI, + const MachineInstr &, unsigned *PredCost) const { if (PredCost) *PredCost = 2; @@ -1039,44 +1040,43 @@ unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex, return RegIndex; } -bool R600InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { - - switch(MI->getOpcode()) { +bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { + switch (MI.getOpcode()) { default: { - MachineBasicBlock *MBB = MI->getParent(); - int OffsetOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), - AMDGPU::OpName::addr); - // addr is a custom operand with multiple MI operands, and only the - // first MI operand is given a name. + MachineBasicBlock *MBB = MI.getParent(); + int OffsetOpIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::addr); + // addr is a custom operand with multiple MI operands, and only the + // first MI operand is given a name. int RegOpIdx = OffsetOpIdx + 1; - int ChanOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), - AMDGPU::OpName::chan); - if (isRegisterLoad(*MI)) { - int DstOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), - AMDGPU::OpName::dst); - unsigned RegIndex = MI->getOperand(RegOpIdx).getImm(); - unsigned Channel = MI->getOperand(ChanOpIdx).getImm(); + int ChanOpIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::chan); + if (isRegisterLoad(MI)) { + int DstOpIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst); + unsigned RegIndex = MI.getOperand(RegOpIdx).getImm(); + unsigned Channel = MI.getOperand(ChanOpIdx).getImm(); unsigned Address = calculateIndirectAddress(RegIndex, Channel); - unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg(); + unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg(); if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { - buildMovInstr(MBB, MI, MI->getOperand(DstOpIdx).getReg(), + buildMovInstr(MBB, MI, MI.getOperand(DstOpIdx).getReg(), getIndirectAddrRegClass()->getRegister(Address)); } else { - buildIndirectRead(MBB, MI, MI->getOperand(DstOpIdx).getReg(), - Address, OffsetReg); + buildIndirectRead(MBB, MI, MI.getOperand(DstOpIdx).getReg(), Address, + OffsetReg); } - } else if (isRegisterStore(*MI)) { - int ValOpIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), - AMDGPU::OpName::val); - unsigned RegIndex = MI->getOperand(RegOpIdx).getImm(); - unsigned Channel = MI->getOperand(ChanOpIdx).getImm(); + } else if (isRegisterStore(MI)) { + int ValOpIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::val); + unsigned RegIndex = MI.getOperand(RegOpIdx).getImm(); + unsigned Channel = MI.getOperand(ChanOpIdx).getImm(); unsigned Address = calculateIndirectAddress(RegIndex, Channel); - unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg(); + unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg(); if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address), - MI->getOperand(ValOpIdx).getReg()); + MI.getOperand(ValOpIdx).getReg()); } else { - buildIndirectWrite(MBB, MI, MI->getOperand(ValOpIdx).getReg(), + buildIndirectWrite(MBB, MI, MI.getOperand(ValOpIdx).getReg(), calculateIndirectAddress(RegIndex, Channel), OffsetReg); } @@ -1089,20 +1089,20 @@ bool R600InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { } case AMDGPU::R600_EXTRACT_ELT_V2: case AMDGPU::R600_EXTRACT_ELT_V4: - buildIndirectRead(MI->getParent(), MI, MI->getOperand(0).getReg(), - RI.getHWRegIndex(MI->getOperand(1).getReg()), // Address - MI->getOperand(2).getReg(), - RI.getHWRegChan(MI->getOperand(1).getReg())); + buildIndirectRead(MI.getParent(), MI, MI.getOperand(0).getReg(), + RI.getHWRegIndex(MI.getOperand(1).getReg()), // Address + MI.getOperand(2).getReg(), + RI.getHWRegChan(MI.getOperand(1).getReg())); break; case AMDGPU::R600_INSERT_ELT_V2: case AMDGPU::R600_INSERT_ELT_V4: - buildIndirectWrite(MI->getParent(), MI, MI->getOperand(2).getReg(), // Value - RI.getHWRegIndex(MI->getOperand(1).getReg()), // Address - MI->getOperand(3).getReg(), // Offset - RI.getHWRegChan(MI->getOperand(1).getReg())); // Channel + buildIndirectWrite(MI.getParent(), MI, MI.getOperand(2).getReg(), // Value + RI.getHWRegIndex(MI.getOperand(1).getReg()), // Address + MI.getOperand(3).getReg(), // Offset + RI.getHWRegChan(MI.getOperand(1).getReg())); // Channel break; } - MI->eraseFromParent(); + MI.eraseFromParent(); return true; } @@ -1153,13 +1153,13 @@ MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, } MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, AMDGPU::AR_X, OffsetReg); - setImmOperand(MOVA, AMDGPU::OpName::write, 0); + setImmOperand(*MOVA, AMDGPU::OpName::write, 0); MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, AddrReg, ValueReg) .addReg(AMDGPU::AR_X, RegState::Implicit | RegState::Kill); - setImmOperand(Mov, AMDGPU::OpName::dst_rel, 1); + setImmOperand(*Mov, AMDGPU::OpName::dst_rel, 1); return Mov; } @@ -1186,13 +1186,13 @@ MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, AMDGPU::AR_X, OffsetReg); - setImmOperand(MOVA, AMDGPU::OpName::write, 0); + setImmOperand(*MOVA, AMDGPU::OpName::write, 0); MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, ValueReg, AddrReg) .addReg(AMDGPU::AR_X, RegState::Implicit | RegState::Kill); - setImmOperand(Mov, AMDGPU::OpName::src0_rel, 1); + setImmOperand(*Mov, AMDGPU::OpName::src0_rel, 1); return Mov; } @@ -1322,7 +1322,7 @@ MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction( MachineOperand &MO = MI->getOperand( getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot))); assert (MO.isImm()); - setImmOperand(MIB, Operands[i], MO.getImm()); + setImmOperand(*MIB, Operands[i], MO.getImm()); } MIB->getOperand(20).setImm(0); return MIB; @@ -1334,7 +1334,7 @@ MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB, uint64_t Imm) const { MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg, AMDGPU::ALU_LITERAL_X); - setImmOperand(MovImm, AMDGPU::OpName::literal, Imm); + setImmOperand(*MovImm, AMDGPU::OpName::literal, Imm); return MovImm; } @@ -1352,12 +1352,12 @@ int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const { return AMDGPU::getNamedOperandIdx(Opcode, Op); } -void R600InstrInfo::setImmOperand(MachineInstr *MI, unsigned Op, +void R600InstrInfo::setImmOperand(MachineInstr &MI, unsigned Op, int64_t Imm) const { - int Idx = getOperandIdx(*MI, Op); + int Idx = getOperandIdx(MI, Op); assert(Idx != -1 && "Operand not supported for this instruction."); - assert(MI->getOperand(Idx).isImm()); - MI->getOperand(Idx).setImm(Imm); + assert(MI.getOperand(Idx).isImm()); + MI.getOperand(Idx).setImm(Imm); } //===----------------------------------------------------------------------===// @@ -1368,9 +1368,9 @@ bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const { return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0; } -MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx, +MachineOperand &R600InstrInfo::getFlagOp(MachineInstr &MI, unsigned SrcIdx, unsigned Flag) const { - unsigned TargetFlags = get(MI->getOpcode()).TSFlags; + unsigned TargetFlags = get(MI.getOpcode()).TSFlags; int FlagIndex = 0; if (Flag != 0) { // If we pass something other than the default value of Flag to this @@ -1380,20 +1380,26 @@ MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx, bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3; switch (Flag) { case MO_FLAG_CLAMP: - FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::clamp); + FlagIndex = getOperandIdx(MI, AMDGPU::OpName::clamp); break; case MO_FLAG_MASK: - FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::write); + FlagIndex = getOperandIdx(MI, AMDGPU::OpName::write); break; case MO_FLAG_NOT_LAST: case MO_FLAG_LAST: - FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::last); + FlagIndex = getOperandIdx(MI, AMDGPU::OpName::last); break; case MO_FLAG_NEG: switch (SrcIdx) { - case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_neg); break; - case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_neg); break; - case 2: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src2_neg); break; + case 0: + FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_neg); + break; + case 1: + FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_neg); + break; + case 2: + FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src2_neg); + break; } break; @@ -1402,8 +1408,12 @@ MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx, "instructions."); (void)IsOP3; switch (SrcIdx) { - case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_abs); break; - case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_abs); break; + case 0: + FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_abs); + break; + case 1: + FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_abs); + break; } break; @@ -1418,14 +1428,14 @@ MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx, "Instruction flags not supported for this instruction"); } - MachineOperand &FlagOp = MI->getOperand(FlagIndex); + MachineOperand &FlagOp = MI.getOperand(FlagIndex); assert(FlagOp.isImm()); return FlagOp; } -void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand, +void R600InstrInfo::addFlag(MachineInstr &MI, unsigned Operand, unsigned Flag) const { - unsigned TargetFlags = get(MI->getOpcode()).TSFlags; + unsigned TargetFlags = get(MI.getOpcode()).TSFlags; if (Flag == 0) { return; } @@ -1444,9 +1454,9 @@ void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand, } } -void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand, +void R600InstrInfo::clearFlag(MachineInstr &MI, unsigned Operand, unsigned Flag) const { - unsigned TargetFlags = get(MI->getOpcode()).TSFlags; + unsigned TargetFlags = get(MI.getOpcode()).TSFlags; if (HAS_NATIVE_OPERANDS(TargetFlags)) { MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); FlagOp.setImm(0); diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.h b/llvm/lib/Target/AMDGPU/R600InstrInfo.h index 420bec89f26..1e53d872bbf 100644 --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.h +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.h @@ -32,8 +32,7 @@ private: const R600Subtarget &ST; std::vector<std::pair<int, unsigned>> - ExtractSrcs(MachineInstr *MI, - const DenseMap<unsigned, unsigned> &PV, + ExtractSrcs(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PV, unsigned &ConstCount) const; MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB, @@ -83,23 +82,23 @@ public: /// \returns true if this \p Opcode represents an ALU instruction or an /// instruction that will be lowered in ExpandSpecialInstrs Pass. - bool canBeConsideredALU(const MachineInstr *MI) const; + bool canBeConsideredALU(const MachineInstr &MI) const; bool isTransOnly(unsigned Opcode) const; - bool isTransOnly(const MachineInstr *MI) const; + bool isTransOnly(const MachineInstr &MI) const; bool isVectorOnly(unsigned Opcode) const; - bool isVectorOnly(const MachineInstr *MI) const; + bool isVectorOnly(const MachineInstr &MI) const; bool isExport(unsigned Opcode) const; bool usesVertexCache(unsigned Opcode) const; - bool usesVertexCache(const MachineInstr *MI) const; + bool usesVertexCache(const MachineInstr &MI) const; bool usesTextureCache(unsigned Opcode) const; - bool usesTextureCache(const MachineInstr *MI) const; + bool usesTextureCache(const MachineInstr &MI) const; bool mustBeLastInClause(unsigned Opcode) const; - bool usesAddressRegister(MachineInstr *MI) const; - bool definesAddressRegister(MachineInstr *MI) const; - bool readsLDSSrcReg(const MachineInstr *MI) const; + bool usesAddressRegister(MachineInstr &MI) const; + bool definesAddressRegister(MachineInstr &MI) const; + bool readsLDSSrcReg(const MachineInstr &MI) const; /// \returns The operand index for the given source number. Legal values /// for SrcNum are 0, 1, and 2. @@ -114,7 +113,7 @@ public: /// If register is ALU_LITERAL, second member is IMM. /// Otherwise, second member value is undefined. SmallVector<std::pair<MachineOperand *, int64_t>, 3> - getSrcs(MachineInstr *MI) const; + getSrcs(MachineInstr &MI) const; unsigned isLegalUpTo( const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs, @@ -205,13 +204,13 @@ public: unsigned int getPredicationCost(const MachineInstr &) const override; unsigned int getInstrLatency(const InstrItineraryData *ItinData, - const MachineInstr *MI, + const MachineInstr &MI, unsigned *PredCost = nullptr) const override; int getInstrLatency(const InstrItineraryData *ItinData, SDNode *Node) const override { return 1;} - bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override; + bool expandPostRAPseudo(MachineInstr &MI) const override; /// \brief Reserve the registers that may be accesed using indirect addressing. void reserveIndirectRegisters(BitVector &Reserved, @@ -286,13 +285,13 @@ public: int getOperandIdx(unsigned Opcode, unsigned Op) const; /// \brief Helper function for setting instruction flag values. - void setImmOperand(MachineInstr *MI, unsigned Op, int64_t Imm) const; + void setImmOperand(MachineInstr &MI, unsigned Op, int64_t Imm) const; /// \returns true if this instruction has an operand for storing target flags. bool hasFlagOperand(const MachineInstr &MI) const; ///\brief Add one of the MO_FLAG* flags to the specified \p Operand. - void addFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const; + void addFlag(MachineInstr &MI, unsigned Operand, unsigned Flag) const; ///\brief Determine if the specified \p Flag is set on this \p Operand. bool isFlagSet(const MachineInstr &MI, unsigned Operand, unsigned Flag) const; @@ -301,11 +300,11 @@ public: /// \param Flag The flag being set. /// /// \returns the operand containing the flags for this instruction. - MachineOperand &getFlagOp(MachineInstr *MI, unsigned SrcIdx = 0, + MachineOperand &getFlagOp(MachineInstr &MI, unsigned SrcIdx = 0, unsigned Flag = 0) const; /// \brief Clear the specified flag on the instruction. - void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const; + void clearFlag(MachineInstr &MI, unsigned Operand, unsigned Flag) const; // Helper functions that check the opcode for status information bool isRegisterStore(const MachineInstr &MI) const; diff --git a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp index 62318fd3781..db18e5bd1af 100644 --- a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp +++ b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp @@ -222,7 +222,7 @@ bool R600SchedStrategy::regBelongsToClass(unsigned Reg, R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const { MachineInstr *MI = SU->getInstr(); - if (TII->isTransOnly(MI)) + if (TII->isTransOnly(*MI)) return AluTrans; switch (MI->getOpcode()) { @@ -286,7 +286,7 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const { return AluT_XYZW; // LDS src registers cannot be used in the Trans slot. - if (TII->readsLDSSrcReg(MI)) + if (TII->readsLDSSrcReg(*MI)) return AluT_XYZW; return AluAny; @@ -323,9 +323,8 @@ SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q, bool AnyALU) { It != E; ++It) { SUnit *SU = *It; InstructionsGroupCandidate.push_back(SU->getInstr()); - if (TII->fitsConstReadLimitations(InstructionsGroupCandidate) - && (!AnyALU || !TII->isVectorOnly(SU->getInstr())) - ) { + if (TII->fitsConstReadLimitations(InstructionsGroupCandidate) && + (!AnyALU || !TII->isVectorOnly(*SU->getInstr()))) { InstructionsGroupCandidate.pop_back(); Q.erase((It + 1).base()); return SU; diff --git a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp index 2f16b117de8..c84866469ae 100644 --- a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp +++ b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp @@ -94,7 +94,7 @@ private: continue; } unsigned Dst = BI->getOperand(DstIdx).getReg(); - if (isTrans || TII->isTransOnly(&*BI)) { + if (isTrans || TII->isTransOnly(*BI)) { Result[Dst] = AMDGPU::PS; continue; } @@ -207,10 +207,10 @@ public: } } - bool ARDef = TII->definesAddressRegister(MII) || - TII->definesAddressRegister(MIJ); - bool ARUse = TII->usesAddressRegister(MII) || - TII->usesAddressRegister(MIJ); + bool ARDef = + TII->definesAddressRegister(*MII) || TII->definesAddressRegister(*MIJ); + bool ARUse = + TII->usesAddressRegister(*MII) || TII->usesAddressRegister(*MIJ); return !ARDef || !ARUse; } @@ -230,14 +230,14 @@ public: const DenseMap<unsigned, unsigned> &PV, std::vector<R600InstrInfo::BankSwizzle> &BS, bool &isTransSlot) { - isTransSlot = TII->isTransOnly(&MI); + isTransSlot = TII->isTransOnly(MI); assert (!isTransSlot || VLIW5); // Is the dst reg sequence legal ? if (!isTransSlot && !CurrentPacketMIs.empty()) { if (getSlot(MI) <= getSlot(*CurrentPacketMIs.back())) { if (ConsideredInstUsesAlreadyWrittenVectorElement && - !TII->isVectorOnly(&MI) && VLIW5) { + !TII->isVectorOnly(MI) && VLIW5) { isTransSlot = true; DEBUG({ dbgs() << "Considering as Trans Inst :"; @@ -284,7 +284,7 @@ public: } // We cannot read LDS source registrs from the Trans slot. - if (isTransSlot && TII->readsLDSSrcReg(&MI)) + if (isTransSlot && TII->readsLDSSrcReg(MI)) return false; CurrentPacketMIs.pop_back(); @@ -319,7 +319,7 @@ public: return It; } endPacket(MI.getParent(), MI); - if (TII->isTransOnly(&MI)) + if (TII->isTransOnly(MI)) return MI; return VLIWPacketizerList::addToPacket(MI); } @@ -378,7 +378,7 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) { // instruction stream until we find the nearest boundary. MachineBasicBlock::iterator I = RegionEnd; for(;I != MBB->begin(); --I, --RemainingCount) { - if (TII->isSchedulingBoundary(&*std::prev(I), &*MBB, Fn)) + if (TII->isSchedulingBoundary(*std::prev(I), &*MBB, Fn)) break; } I = MBB->begin(); diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 4ba9d73e321..4ecc0fcc623 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -132,7 +132,7 @@ static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList, MachineInstr *MI, unsigned OpNo, MachineOperand *OpToFold, const SIInstrInfo *TII) { - if (!TII->isOperandLegal(MI, OpNo, OpToFold)) { + if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) { // Special case for v_mac_f32_e64 if we are trying to fold into src2 unsigned Opc = MI->getOpcode(); @@ -159,7 +159,7 @@ static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList, // see if this makes it possible to fold. unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex; unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex; - bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1); + bool CanCommute = TII->findCommutedOpIndices(*MI, CommuteIdx0, CommuteIdx1); if (CanCommute) { if (CommuteIdx0 == OpNo) @@ -177,10 +177,10 @@ static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList, return false; if (!CanCommute || - !TII->commuteInstruction(MI, false, CommuteIdx0, CommuteIdx1)) + !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1)) return false; - if (!TII->isOperandLegal(MI, OpNo, OpToFold)) + if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) return false; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index ad02c4113ca..7ea3c5db8e9 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3223,7 +3223,7 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, if (TII->isVOP3(MI->getOpcode())) { // Make sure constant bus requirements are respected. - TII->legalizeOperandsVOP3(MRI, MI); + TII->legalizeOperandsVOP3(MRI, *MI); return; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 8c05eaf30ac..159fb72f89f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -75,12 +75,12 @@ static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) { return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx); } -bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, +bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, AliasAnalysis *AA) const { // TODO: The generic check fails for VALU instructions that should be // rematerializable due to implicit reads of exec. We really want all of the // generic logic for this except for this. - switch (MI->getOpcode()) { + switch (MI.getOpcode()) { case AMDGPU::V_MOV_B32_e32: case AMDGPU::V_MOV_B32_e64: case AMDGPU::V_MOV_B64_PSEUDO: @@ -202,18 +202,18 @@ static bool isStride64(unsigned Opc) { } } -bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, +bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) const { - unsigned Opc = LdSt->getOpcode(); + unsigned Opc = LdSt.getOpcode(); - if (isDS(*LdSt)) { - const MachineOperand *OffsetImm = getNamedOperand(*LdSt, - AMDGPU::OpName::offset); + if (isDS(LdSt)) { + const MachineOperand *OffsetImm = + getNamedOperand(LdSt, AMDGPU::OpName::offset); if (OffsetImm) { // Normal, single offset LDS instruction. - const MachineOperand *AddrReg = getNamedOperand(*LdSt, - AMDGPU::OpName::addr); + const MachineOperand *AddrReg = + getNamedOperand(LdSt, AMDGPU::OpName::addr); BaseReg = AddrReg->getReg(); Offset = OffsetImm->getImm(); @@ -223,10 +223,10 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, // The 2 offset instructions use offset0 and offset1 instead. We can treat // these as a load with a single offset if the 2 offsets are consecutive. We // will use this for some partially aligned loads. - const MachineOperand *Offset0Imm = getNamedOperand(*LdSt, - AMDGPU::OpName::offset0); - const MachineOperand *Offset1Imm = getNamedOperand(*LdSt, - AMDGPU::OpName::offset1); + const MachineOperand *Offset0Imm = + getNamedOperand(LdSt, AMDGPU::OpName::offset0); + const MachineOperand *Offset1Imm = + getNamedOperand(LdSt, AMDGPU::OpName::offset1); uint8_t Offset0 = Offset0Imm->getImm(); uint8_t Offset1 = Offset1Imm->getImm(); @@ -236,19 +236,19 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, // to bytes of the individual reads. unsigned EltSize; - if (LdSt->mayLoad()) - EltSize = getOpRegClass(*LdSt, 0)->getSize() / 2; + if (LdSt.mayLoad()) + EltSize = getOpRegClass(LdSt, 0)->getSize() / 2; else { - assert(LdSt->mayStore()); + assert(LdSt.mayStore()); int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); - EltSize = getOpRegClass(*LdSt, Data0Idx)->getSize(); + EltSize = getOpRegClass(LdSt, Data0Idx)->getSize(); } if (isStride64(Opc)) EltSize *= 64; - const MachineOperand *AddrReg = getNamedOperand(*LdSt, - AMDGPU::OpName::addr); + const MachineOperand *AddrReg = + getNamedOperand(LdSt, AMDGPU::OpName::addr); BaseReg = AddrReg->getReg(); Offset = EltSize * Offset0; return true; @@ -257,37 +257,37 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, return false; } - if (isMUBUF(*LdSt) || isMTBUF(*LdSt)) { + if (isMUBUF(LdSt) || isMTBUF(LdSt)) { if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset) != -1) return false; - const MachineOperand *AddrReg = getNamedOperand(*LdSt, - AMDGPU::OpName::vaddr); + const MachineOperand *AddrReg = + getNamedOperand(LdSt, AMDGPU::OpName::vaddr); if (!AddrReg) return false; - const MachineOperand *OffsetImm = getNamedOperand(*LdSt, - AMDGPU::OpName::offset); + const MachineOperand *OffsetImm = + getNamedOperand(LdSt, AMDGPU::OpName::offset); BaseReg = AddrReg->getReg(); Offset = OffsetImm->getImm(); return true; } - if (isSMRD(*LdSt)) { - const MachineOperand *OffsetImm = getNamedOperand(*LdSt, - AMDGPU::OpName::offset); + if (isSMRD(LdSt)) { + const MachineOperand *OffsetImm = + getNamedOperand(LdSt, AMDGPU::OpName::offset); if (!OffsetImm) return false; - const MachineOperand *SBaseReg = getNamedOperand(*LdSt, - AMDGPU::OpName::sbase); + const MachineOperand *SBaseReg = + getNamedOperand(LdSt, AMDGPU::OpName::sbase); BaseReg = SBaseReg->getReg(); Offset = OffsetImm->getImm(); return true; } - if (isFLAT(*LdSt)) { - const MachineOperand *AddrReg = getNamedOperand(*LdSt, AMDGPU::OpName::addr); + if (isFLAT(LdSt)) { + const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::addr); BaseReg = AddrReg->getReg(); Offset = 0; return true; @@ -296,26 +296,26 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, return false; } -bool SIInstrInfo::shouldClusterMemOps(MachineInstr *FirstLdSt, - MachineInstr *SecondLdSt, +bool SIInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt, + MachineInstr &SecondLdSt, unsigned NumLoads) const { const MachineOperand *FirstDst = nullptr; const MachineOperand *SecondDst = nullptr; - if (isDS(*FirstLdSt) && isDS(*SecondLdSt)) { - FirstDst = getNamedOperand(*FirstLdSt, AMDGPU::OpName::vdst); - SecondDst = getNamedOperand(*SecondLdSt, AMDGPU::OpName::vdst); + if (isDS(FirstLdSt) && isDS(SecondLdSt)) { + FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst); + SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst); } - if (isSMRD(*FirstLdSt) && isSMRD(*SecondLdSt)) { - FirstDst = getNamedOperand(*FirstLdSt, AMDGPU::OpName::sdst); - SecondDst = getNamedOperand(*SecondLdSt, AMDGPU::OpName::sdst); + if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt)) { + FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::sdst); + SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::sdst); } - if ((isMUBUF(*FirstLdSt) && isMUBUF(*SecondLdSt)) || - (isMTBUF(*FirstLdSt) && isMTBUF(*SecondLdSt))) { - FirstDst = getNamedOperand(*FirstLdSt, AMDGPU::OpName::vdata); - SecondDst = getNamedOperand(*SecondLdSt, AMDGPU::OpName::vdata); + if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) || + (isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt))) { + FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdata); + SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdata); } if (!FirstDst || !SecondDst) @@ -332,7 +332,7 @@ bool SIInstrInfo::shouldClusterMemOps(MachineInstr *FirstLdSt, unsigned LoadClusterThreshold = 16; const MachineRegisterInfo &MRI = - FirstLdSt->getParent()->getParent()->getRegInfo(); + FirstLdSt.getParent()->getParent()->getRegInfo(); const TargetRegisterClass *DstRC = MRI.getRegClass(FirstDst->getReg()); return (NumLoads * DstRC->getSize()) <= LoadClusterThreshold; @@ -723,11 +723,9 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, } /// \param @Offset Offset in bytes of the FrameIndex being spilled -unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - RegScavenger *RS, unsigned TmpReg, - unsigned FrameOffset, - unsigned Size) const { +unsigned SIInstrInfo::calculateLDSSpillAddress( + MachineBasicBlock &MBB, MachineInstr &MI, RegScavenger *RS, unsigned TmpReg, + unsigned FrameOffset, unsigned Size) const { MachineFunction *MF = MBB.getParent(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); @@ -849,23 +847,23 @@ unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) const { } } -bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { - MachineBasicBlock &MBB = *MI->getParent(); +bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { + MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MBB.findDebugLoc(MI); - switch (MI->getOpcode()) { + switch (MI.getOpcode()) { default: return AMDGPUInstrInfo::expandPostRAPseudo(MI); case AMDGPU::SGPR_USE: // This is just a placeholder for register allocation. - MI->eraseFromParent(); + MI.eraseFromParent(); break; case AMDGPU::V_MOV_B64_PSEUDO: { - unsigned Dst = MI->getOperand(0).getReg(); + unsigned Dst = MI.getOperand(0).getReg(); unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0); unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1); - const MachineOperand &SrcOp = MI->getOperand(1); + const MachineOperand &SrcOp = MI.getOperand(1); // FIXME: Will this work for 64-bit floating point immediates? assert(!SrcOp.isFPImm()); if (SrcOp.isImm()) { @@ -885,17 +883,17 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1)) .addReg(Dst, RegState::Implicit | RegState::Define); } - MI->eraseFromParent(); + MI.eraseFromParent(); break; } case AMDGPU::V_CNDMASK_B64_PSEUDO: { - unsigned Dst = MI->getOperand(0).getReg(); + unsigned Dst = MI.getOperand(0).getReg(); unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0); unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1); - unsigned Src0 = MI->getOperand(1).getReg(); - unsigned Src1 = MI->getOperand(2).getReg(); - const MachineOperand &SrcCond = MI->getOperand(3); + unsigned Src0 = MI.getOperand(1).getReg(); + unsigned Src1 = MI.getOperand(2).getReg(); + const MachineOperand &SrcCond = MI.getOperand(3); BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo) .addReg(RI.getSubReg(Src0, AMDGPU::sub0)) @@ -907,7 +905,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { .addReg(RI.getSubReg(Src1, AMDGPU::sub1)) .addReg(SrcCond.getReg(), getKillRegState(SrcCond.isKill())) .addReg(Dst, RegState::Implicit | RegState::Define); - MI->eraseFromParent(); + MI.eraseFromParent(); break; } @@ -915,7 +913,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(ST.getRegisterInfo()); MachineFunction &MF = *MBB.getParent(); - unsigned Reg = MI->getOperand(0).getReg(); + unsigned Reg = MI.getOperand(0).getReg(); unsigned RegLo = TRI->getSubReg(Reg, AMDGPU::sub0); unsigned RegHi = TRI->getSubReg(Reg, AMDGPU::sub1); @@ -927,15 +925,15 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { // Add 32-bit offset from this instruction to the start of the // constant data. Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo) - .addReg(RegLo) - .addOperand(MI->getOperand(1))); + .addReg(RegLo) + .addOperand(MI.getOperand(1))); Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi) .addReg(RegHi) .addImm(0)); llvm::finalizeBundle(MBB, Bundler.begin()); - MI->eraseFromParent(); + MI.eraseFromParent(); break; } } @@ -949,22 +947,21 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { /// non-commutable pair of operand indices OpIdx0 and OpIdx1. /// Even though the instruction is commutable, the method may still /// fail to commute the operands, null pointer is returned in such cases. -MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr *MI, - bool NewMI, +MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const { - int CommutedOpcode = commuteOpcode(*MI); + int CommutedOpcode = commuteOpcode(MI); if (CommutedOpcode == -1) return nullptr; - int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), - AMDGPU::OpName::src0); - MachineOperand &Src0 = MI->getOperand(Src0Idx); + int Src0Idx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); + MachineOperand &Src0 = MI.getOperand(Src0Idx); if (!Src0.isReg()) return nullptr; - int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), - AMDGPU::OpName::src1); + int Src1Idx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1); if ((OpIdx0 != static_cast<unsigned>(Src0Idx) || OpIdx1 != static_cast<unsigned>(Src1Idx)) && @@ -972,32 +969,32 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr *MI, OpIdx1 != static_cast<unsigned>(Src0Idx))) return nullptr; - MachineOperand &Src1 = MI->getOperand(Src1Idx); + MachineOperand &Src1 = MI.getOperand(Src1Idx); - if (isVOP2(*MI) || isVOPC(*MI)) { - const MCInstrDesc &InstrDesc = MI->getDesc(); + if (isVOP2(MI) || isVOPC(MI)) { + const MCInstrDesc &InstrDesc = MI.getDesc(); // For VOP2 and VOPC instructions, any operand type is valid to use for // src0. Make sure we can use the src0 as src1. // // We could be stricter here and only allow commuting if there is a reason // to do so. i.e. if both operands are VGPRs there is no real benefit, // although MachineCSE attempts to find matches by commuting. - const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); if (!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) return nullptr; } + MachineInstr *CommutedMI = &MI; if (!Src1.isReg()) { // Allow commuting instructions with Imm operands. - if (NewMI || !Src1.isImm() || - (!isVOP2(*MI) && !isVOP3(*MI))) { + if (NewMI || !Src1.isImm() || (!isVOP2(MI) && !isVOP3(MI))) { return nullptr; } // Be sure to copy the source modifiers to the right place. - if (MachineOperand *Src0Mods - = getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) { - MachineOperand *Src1Mods - = getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers); + if (MachineOperand *Src0Mods = + getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)) { + MachineOperand *Src1Mods = + getNamedOperand(MI, AMDGPU::OpName::src1_modifiers); int Src0ModsVal = Src0Mods->getImm(); if (!Src1Mods && Src0ModsVal != 0) @@ -1022,26 +1019,26 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr *MI, Src1.ChangeToRegister(Reg, false); Src1.setSubReg(SubReg); } else { - MI = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx0, OpIdx1); + CommutedMI = + TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx0, OpIdx1); } - if (MI) - MI->setDesc(get(CommutedOpcode)); + if (CommutedMI) + CommutedMI->setDesc(get(CommutedOpcode)); - return MI; + return CommutedMI; } // This needs to be implemented because the source modifiers may be inserted // between the true commutable operands, and the base // TargetInstrInfo::commuteInstruction uses it. -bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI, - unsigned &SrcOpIdx0, +bool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const { - const MCInstrDesc &MCID = MI->getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); if (!MCID.isCommutable()) return false; - unsigned Opc = MI->getOpcode(); + unsigned Opc = MI.getOpcode(); int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); if (Src0Idx == -1) return false; @@ -1049,24 +1046,24 @@ bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI, // FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on // immediate. Also, immediate src0 operand is not handled in // SIInstrInfo::commuteInstruction(); - if (!MI->getOperand(Src0Idx).isReg()) + if (!MI.getOperand(Src0Idx).isReg()) return false; int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); if (Src1Idx == -1) return false; - MachineOperand &Src1 = MI->getOperand(Src1Idx); + MachineOperand &Src1 = MI.getOperand(Src1Idx); if (Src1.isImm()) { // SIInstrInfo::commuteInstruction() does support commuting the immediate // operand src1 in 2 and 3 operand instructions. - if (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode())) + if (!isVOP2(MI.getOpcode()) && !isVOP3(MI.getOpcode())) return false; } else if (Src1.isReg()) { // If any source modifiers are set, the generic instruction commuting won't // understand how to copy the source modifiers. - if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) || - hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers)) + if (hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) || + hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers)) return false; } else return false; @@ -1222,22 +1219,22 @@ static void removeModOperands(MachineInstr &MI) { // TODO: Maybe this should be removed this and custom fold everything in // SIFoldOperands? -bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, +bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, MachineRegisterInfo *MRI) const { if (!MRI->hasOneNonDBGUse(Reg)) return false; - unsigned Opc = UseMI->getOpcode(); + unsigned Opc = UseMI.getOpcode(); if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64) { // Don't fold if we are using source modifiers. The new VOP2 instructions // don't have them. - if (hasModifiersSet(*UseMI, AMDGPU::OpName::src0_modifiers) || - hasModifiersSet(*UseMI, AMDGPU::OpName::src1_modifiers) || - hasModifiersSet(*UseMI, AMDGPU::OpName::src2_modifiers)) { + if (hasModifiersSet(UseMI, AMDGPU::OpName::src0_modifiers) || + hasModifiersSet(UseMI, AMDGPU::OpName::src1_modifiers) || + hasModifiersSet(UseMI, AMDGPU::OpName::src2_modifiers)) { return false; } - const MachineOperand &ImmOp = DefMI->getOperand(1); + const MachineOperand &ImmOp = DefMI.getOperand(1); // If this is a free constant, there's no reason to do this. // TODO: We could fold this here instead of letting SIFoldOperands do it @@ -1245,9 +1242,9 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, if (isInlineConstant(ImmOp, 4)) return false; - MachineOperand *Src0 = getNamedOperand(*UseMI, AMDGPU::OpName::src0); - MachineOperand *Src1 = getNamedOperand(*UseMI, AMDGPU::OpName::src1); - MachineOperand *Src2 = getNamedOperand(*UseMI, AMDGPU::OpName::src2); + MachineOperand *Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0); + MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1); + MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2); // Multiplied part is the constant: Use v_madmk_f32 // We should only expect these to be on src0 due to canonicalizations. @@ -1260,16 +1257,16 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, // We need to swap operands 0 and 1 since madmk constant is at operand 1. - const int64_t Imm = DefMI->getOperand(1).getImm(); + const int64_t Imm = DefMI.getOperand(1).getImm(); // FIXME: This would be a lot easier if we could return a new instruction // instead of having to modify in place. // Remove these first since they are at the end. - UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, - AMDGPU::OpName::omod)); - UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, - AMDGPU::OpName::clamp)); + UseMI.RemoveOperand( + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod)); + UseMI.RemoveOperand( + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp)); unsigned Src1Reg = Src1->getReg(); unsigned Src1SubReg = Src1->getSubReg(); @@ -1278,18 +1275,18 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, Src0->setIsKill(Src1->isKill()); if (Opc == AMDGPU::V_MAC_F32_e64) { - UseMI->untieRegOperand( - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); + UseMI.untieRegOperand( + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); } Src1->ChangeToImmediate(Imm); - removeModOperands(*UseMI); - UseMI->setDesc(get(AMDGPU::V_MADMK_F32)); + removeModOperands(UseMI); + UseMI.setDesc(get(AMDGPU::V_MADMK_F32)); bool DeleteDef = MRI->hasOneNonDBGUse(Reg); if (DeleteDef) - DefMI->eraseFromParent(); + DefMI.eraseFromParent(); return true; } @@ -1305,32 +1302,32 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))) return false; - const int64_t Imm = DefMI->getOperand(1).getImm(); + const int64_t Imm = DefMI.getOperand(1).getImm(); // FIXME: This would be a lot easier if we could return a new instruction // instead of having to modify in place. // Remove these first since they are at the end. - UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, - AMDGPU::OpName::omod)); - UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, - AMDGPU::OpName::clamp)); + UseMI.RemoveOperand( + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod)); + UseMI.RemoveOperand( + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp)); if (Opc == AMDGPU::V_MAC_F32_e64) { - UseMI->untieRegOperand( - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); + UseMI.untieRegOperand( + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); } // ChangingToImmediate adds Src2 back to the instruction. Src2->ChangeToImmediate(Imm); // These come before src2. - removeModOperands(*UseMI); - UseMI->setDesc(get(AMDGPU::V_MADAK_F32)); + removeModOperands(UseMI); + UseMI.setDesc(get(AMDGPU::V_MADAK_F32)); bool DeleteDef = MRI->hasOneNonDBGUse(Reg); if (DeleteDef) - DefMI->eraseFromParent(); + DefMI.eraseFromParent(); return true; } @@ -1347,20 +1344,20 @@ static bool offsetsDoNotOverlap(int WidthA, int OffsetA, return LowOffset + LowWidth <= HighOffset; } -bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa, - MachineInstr *MIb) const { +bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr &MIa, + MachineInstr &MIb) const { unsigned BaseReg0, BaseReg1; int64_t Offset0, Offset1; if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) && getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) { - if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) { + if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand()) { // FIXME: Handle ds_read2 / ds_write2. return false; } - unsigned Width0 = (*MIa->memoperands_begin())->getSize(); - unsigned Width1 = (*MIb->memoperands_begin())->getSize(); + unsigned Width0 = (*MIa.memoperands_begin())->getSize(); + unsigned Width1 = (*MIb.memoperands_begin())->getSize(); if (BaseReg0 == BaseReg1 && offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) { return true; @@ -1370,19 +1367,19 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa, return false; } -bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa, - MachineInstr *MIb, +bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr &MIa, + MachineInstr &MIb, AliasAnalysis *AA) const { - assert(MIa && (MIa->mayLoad() || MIa->mayStore()) && + assert((MIa.mayLoad() || MIa.mayStore()) && "MIa must load from or modify a memory location"); - assert(MIb && (MIb->mayLoad() || MIb->mayStore()) && + assert((MIb.mayLoad() || MIb.mayStore()) && "MIb must load from or modify a memory location"); - if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects()) + if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects()) return false; // XXX - Can we relax this between address spaces? - if (MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef()) + if (MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) return false; // TODO: Should we check the address space from the MachineMemOperand? That @@ -1390,29 +1387,29 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa, // underlying address space, even if it was lowered to a different one, // e.g. private accesses lowered to use MUBUF instructions on a scratch // buffer. - if (isDS(*MIa)) { - if (isDS(*MIb)) + if (isDS(MIa)) { + if (isDS(MIb)) return checkInstOffsetsDoNotOverlap(MIa, MIb); - return !isFLAT(*MIb); + return !isFLAT(MIb); } - if (isMUBUF(*MIa) || isMTBUF(*MIa)) { - if (isMUBUF(*MIb) || isMTBUF(*MIb)) + if (isMUBUF(MIa) || isMTBUF(MIa)) { + if (isMUBUF(MIb) || isMTBUF(MIb)) return checkInstOffsetsDoNotOverlap(MIa, MIb); - return !isFLAT(*MIb) && !isSMRD(*MIb); + return !isFLAT(MIb) && !isSMRD(MIb); } - if (isSMRD(*MIa)) { - if (isSMRD(*MIb)) + if (isSMRD(MIa)) { + if (isSMRD(MIb)) return checkInstOffsetsDoNotOverlap(MIa, MIb); - return !isFLAT(*MIb) && !isMUBUF(*MIa) && !isMTBUF(*MIa); + return !isFLAT(MIb) && !isMUBUF(MIa) && !isMTBUF(MIa); } - if (isFLAT(*MIa)) { - if (isFLAT(*MIb)) + if (isFLAT(MIa)) { + if (isFLAT(MIb)) return checkInstOffsetsDoNotOverlap(MIa, MIb); return false; @@ -1422,45 +1419,47 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa, } MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB, - MachineBasicBlock::iterator &MI, - LiveVariables *LV) const { - - switch (MI->getOpcode()) { - default: return nullptr; - case AMDGPU::V_MAC_F32_e64: break; - case AMDGPU::V_MAC_F32_e32: { - const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0); - if (Src0->isImm() && !isInlineConstant(*Src0, 4)) - return nullptr; - break; - } + MachineInstr &MI, + LiveVariables *LV) const { + + switch (MI.getOpcode()) { + default: + return nullptr; + case AMDGPU::V_MAC_F32_e64: + break; + case AMDGPU::V_MAC_F32_e32: { + const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0); + if (Src0->isImm() && !isInlineConstant(*Src0, 4)) + return nullptr; + break; + } } - const MachineOperand *Dst = getNamedOperand(*MI, AMDGPU::OpName::vdst); - const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0); - const MachineOperand *Src1 = getNamedOperand(*MI, AMDGPU::OpName::src1); - const MachineOperand *Src2 = getNamedOperand(*MI, AMDGPU::OpName::src2); + const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst); + const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0); + const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1); + const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2); - return BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_MAD_F32)) - .addOperand(*Dst) - .addImm(0) // Src0 mods - .addOperand(*Src0) - .addImm(0) // Src1 mods - .addOperand(*Src1) - .addImm(0) // Src mods - .addOperand(*Src2) - .addImm(0) // clamp - .addImm(0); // omod + return BuildMI(*MBB, MI, MI.getDebugLoc(), get(AMDGPU::V_MAD_F32)) + .addOperand(*Dst) + .addImm(0) // Src0 mods + .addOperand(*Src0) + .addImm(0) // Src1 mods + .addOperand(*Src1) + .addImm(0) // Src mods + .addOperand(*Src2) + .addImm(0) // clamp + .addImm(0); // omod } -bool SIInstrInfo::isSchedulingBoundary(const MachineInstr *MI, +bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const { // Target-independent instructions do not have an implicit-use of EXEC, even // when they operate on VGPRs. Treating EXEC modifications as scheduling // boundaries prevents incorrect movements of such instructions. const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo(); - if (MI->modifiesRegister(AMDGPU::EXEC, TRI)) + if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) return true; return AMDGPUInstrInfo::isSchedulingBoundary(MI, MBB, MF); @@ -1541,9 +1540,9 @@ static bool compareMachineOp(const MachineOperand &Op0, } } -bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo, - const MachineOperand &MO) const { - const MCOperandInfo &OpInfo = get(MI->getOpcode()).OpInfo[OpNo]; +bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, + const MachineOperand &MO) const { + const MCOperandInfo &OpInfo = get(MI.getOpcode()).OpInfo[OpNo]; assert(MO.isImm() || MO.isTargetIndex() || MO.isFI()); @@ -1653,10 +1652,10 @@ static bool shouldReadExec(const MachineInstr &MI) { return true; } -bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, +bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const { - uint16_t Opcode = MI->getOpcode(); - const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + uint16_t Opcode = MI.getOpcode(); + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); @@ -1664,14 +1663,14 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, // Make sure the number of operands is correct. const MCInstrDesc &Desc = get(Opcode); if (!Desc.isVariadic() && - Desc.getNumOperands() != MI->getNumExplicitOperands()) { - ErrInfo = "Instruction has wrong number of operands."; - return false; + Desc.getNumOperands() != MI.getNumExplicitOperands()) { + ErrInfo = "Instruction has wrong number of operands."; + return false; } // Make sure the register classes are correct. for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) { - if (MI->getOperand(i).isFPImm()) { + if (MI.getOperand(i).isFPImm()) { ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast " "all fp values to integers."; return false; @@ -1681,7 +1680,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, switch (Desc.OpInfo[i].OperandType) { case MCOI::OPERAND_REGISTER: - if (MI->getOperand(i).isImm()) { + if (MI.getOperand(i).isImm()) { ErrInfo = "Illegal immediate value for operand."; return false; } @@ -1689,7 +1688,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, case AMDGPU::OPERAND_REG_IMM32: break; case AMDGPU::OPERAND_REG_INLINE_C: - if (isLiteralConstant(MI->getOperand(i), + if (isLiteralConstant(MI.getOperand(i), RI.getRegClass(RegClass)->getSize())) { ErrInfo = "Illegal immediate value for operand."; return false; @@ -1699,7 +1698,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, // Check if this operand is an immediate. // FrameIndex operands will be replaced by immediates, so they are // allowed. - if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFI()) { + if (!MI.getOperand(i).isImm() && !MI.getOperand(i).isFI()) { ErrInfo = "Expected immediate, but got non-immediate"; return false; } @@ -1708,11 +1707,11 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, continue; } - if (!MI->getOperand(i).isReg()) + if (!MI.getOperand(i).isReg()) continue; if (RegClass != -1) { - unsigned Reg = MI->getOperand(i).getReg(); + unsigned Reg = MI.getOperand(i).getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) continue; @@ -1725,21 +1724,21 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, } // Verify VOP* - if (isVOP1(*MI) || isVOP2(*MI) || isVOP3(*MI) || isVOPC(*MI)) { + if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI)) { // Only look at the true operands. Only a real operand can use the constant // bus, and we don't want to check pseudo-operands like the source modifier // flags. const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; unsigned ConstantBusCount = 0; - unsigned SGPRUsed = findImplicitSGPRRead(*MI); + unsigned SGPRUsed = findImplicitSGPRRead(MI); if (SGPRUsed != AMDGPU::NoRegister) ++ConstantBusCount; for (int OpIdx : OpIndices) { if (OpIdx == -1) break; - const MachineOperand &MO = MI->getOperand(OpIdx); + const MachineOperand &MO = MI.getOperand(OpIdx); if (usesConstantBus(MRI, MO, getOpSize(Opcode, OpIdx))) { if (MO.isReg()) { if (MO.getReg() != SGPRUsed) @@ -1759,9 +1758,9 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, // Verify misc. restrictions on specific instructions. if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 || Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) { - const MachineOperand &Src0 = MI->getOperand(Src0Idx); - const MachineOperand &Src1 = MI->getOperand(Src1Idx); - const MachineOperand &Src2 = MI->getOperand(Src2Idx); + const MachineOperand &Src0 = MI.getOperand(Src0Idx); + const MachineOperand &Src1 = MI.getOperand(Src1Idx); + const MachineOperand &Src2 = MI.getOperand(Src2Idx); if (Src0.isReg() && Src1.isReg() && Src2.isReg()) { if (!compareMachineOp(Src0, Src1) && !compareMachineOp(Src0, Src2)) { @@ -1773,8 +1772,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, // Make sure we aren't losing exec uses in the td files. This mostly requires // being careful when using let Uses to try to add other use registers. - if (shouldReadExec(*MI)) { - if (!MI->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) { + if (shouldReadExec(MI)) { + if (!MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) { ErrInfo = "VALU instruction does not implicitly read exec mask"; return false; } @@ -1875,12 +1874,12 @@ bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const { } } -void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const { +void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const { MachineBasicBlock::iterator I = MI; - MachineBasicBlock *MBB = MI->getParent(); - MachineOperand &MO = MI->getOperand(OpIdx); + MachineBasicBlock *MBB = MI.getParent(); + MachineOperand &MO = MI.getOperand(OpIdx); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass; + unsigned RCID = get(MI.getOpcode()).OpInfo[OpIdx].RegClass; const TargetRegisterClass *RC = RI.getRegClass(RCID); unsigned Opcode = AMDGPU::V_MOV_B32_e32; if (MO.isReg()) @@ -1896,8 +1895,7 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const { unsigned Reg = MRI.createVirtualRegister(VRC); DebugLoc DL = MBB->findDebugLoc(I); - BuildMI(*MI->getParent(), I, DL, get(Opcode), Reg) - .addOperand(MO); + BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).addOperand(MO); MO.ChangeToRegister(Reg, false); } @@ -1956,11 +1954,11 @@ MachineOperand SIInstrInfo::buildExtractSubRegOrImm( } // Change the order of operands from (0, 1, 2) to (0, 2, 1) -void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const { - assert(Inst->getNumExplicitOperands() == 3); - MachineOperand Op1 = Inst->getOperand(1); - Inst->RemoveOperand(1); - Inst->addOperand(Op1); +void SIInstrInfo::swapOperands(MachineInstr &Inst) const { + assert(Inst.getNumExplicitOperands() == 3); + MachineOperand Op1 = Inst.getOperand(1); + Inst.RemoveOperand(1); + Inst.addOperand(Op1); } bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI, @@ -2002,30 +2000,29 @@ bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI, return true; } -bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx, +bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO) const { - const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); - const MCInstrDesc &InstDesc = MI->getDesc(); + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + const MCInstrDesc &InstDesc = MI.getDesc(); const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx]; const TargetRegisterClass *DefinedRC = OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr; if (!MO) - MO = &MI->getOperand(OpIdx); + MO = &MI.getOperand(OpIdx); - if (isVALU(*MI) && - usesConstantBus(MRI, *MO, DefinedRC->getSize())) { + if (isVALU(MI) && usesConstantBus(MRI, *MO, DefinedRC->getSize())) { RegSubRegPair SGPRUsed; if (MO->isReg()) SGPRUsed = RegSubRegPair(MO->getReg(), MO->getSubReg()); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { if (i == OpIdx) continue; - const MachineOperand &Op = MI->getOperand(i); + const MachineOperand &Op = MI.getOperand(i); if (Op.isReg() && (Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) && - usesConstantBus(MRI, Op, getOpSize(*MI, i))) { + usesConstantBus(MRI, Op, getOpSize(MI, i))) { return false; } } @@ -2048,12 +2045,12 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx, } void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, - MachineInstr *MI) const { - unsigned Opc = MI->getOpcode(); + MachineInstr &MI) const { + unsigned Opc = MI.getOpcode(); const MCInstrDesc &InstrDesc = get(Opc); int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); - MachineOperand &Src1 = MI->getOperand(Src1Idx); + MachineOperand &Src1 = MI.getOperand(Src1Idx); // If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32 // we need to only have one constant bus use. @@ -2061,10 +2058,10 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, // Note we do not need to worry about literal constants here. They are // disabled for the operand type for instructions because they will always // violate the one constant bus use rule. - bool HasImplicitSGPR = findImplicitSGPRRead(*MI) != AMDGPU::NoRegister; + bool HasImplicitSGPR = findImplicitSGPRRead(MI) != AMDGPU::NoRegister; if (HasImplicitSGPR) { int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); - MachineOperand &Src0 = MI->getOperand(Src0Idx); + MachineOperand &Src0 = MI.getOperand(Src0Idx); if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg())) legalizeOpWithMove(MI, Src0Idx); @@ -2079,13 +2076,13 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, // commute if it is possible. We only want to commute here if it improves // legality. This can be called a fairly large number of times so don't waste // compile time pointlessly swapping and checking legality again. - if (HasImplicitSGPR || !MI->isCommutable()) { + if (HasImplicitSGPR || !MI.isCommutable()) { legalizeOpWithMove(MI, Src1Idx); return; } int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); - MachineOperand &Src0 = MI->getOperand(Src0Idx); + MachineOperand &Src0 = MI.getOperand(Src0Idx); // If src0 can be used as src1, commuting will make the operands legal. // Otherwise we have to give up and insert a move. @@ -2098,13 +2095,13 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, return; } - int CommutedOpc = commuteOpcode(*MI); + int CommutedOpc = commuteOpcode(MI); if (CommutedOpc == -1) { legalizeOpWithMove(MI, Src1Idx); return; } - MI->setDesc(get(CommutedOpc)); + MI.setDesc(get(CommutedOpc)); unsigned Src0Reg = Src0.getReg(); unsigned Src0SubReg = Src0.getSubReg(); @@ -2126,10 +2123,9 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, // operand, and since literal constants are not allowed and should never be // seen, we only need to worry about inserting copies if we use multiple SGPR // operands. -void SIInstrInfo::legalizeOperandsVOP3( - MachineRegisterInfo &MRI, - MachineInstr *MI) const { - unsigned Opc = MI->getOpcode(); +void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI, + MachineInstr &MI) const { + unsigned Opc = MI.getOpcode(); int VOP3Idx[3] = { AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0), @@ -2144,7 +2140,7 @@ void SIInstrInfo::legalizeOperandsVOP3( int Idx = VOP3Idx[i]; if (Idx == -1) break; - MachineOperand &MO = MI->getOperand(Idx); + MachineOperand &MO = MI.getOperand(Idx); // We should never see a VOP3 instruction with an illegal immediate operand. if (!MO.isReg()) @@ -2165,8 +2161,8 @@ void SIInstrInfo::legalizeOperandsVOP3( } } -unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr *UseMI, - MachineRegisterInfo &MRI) const { +unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI, + MachineRegisterInfo &MRI) const { const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg); const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC); unsigned DstReg = MRI.createVirtualRegister(SRC); @@ -2175,15 +2171,15 @@ unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr *UseMI, SmallVector<unsigned, 8> SRegs; for (unsigned i = 0; i < SubRegs; ++i) { unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - BuildMI(*UseMI->getParent(), UseMI, UseMI->getDebugLoc(), + BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(AMDGPU::V_READFIRSTLANE_B32), SGPR) - .addReg(SrcReg, 0, RI.getSubRegFromChannel(i)); + .addReg(SrcReg, 0, RI.getSubRegFromChannel(i)); SRegs.push_back(SGPR); } - MachineInstrBuilder MIB = BuildMI(*UseMI->getParent(), UseMI, - UseMI->getDebugLoc(), - get(AMDGPU::REG_SEQUENCE), DstReg); + MachineInstrBuilder MIB = + BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), + get(AMDGPU::REG_SEQUENCE), DstReg); for (unsigned i = 0; i < SubRegs; ++i) { MIB.addReg(SRegs[i]); MIB.addImm(RI.getSubRegFromChannel(i)); @@ -2192,36 +2188,36 @@ unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr *UseMI, } void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI, - MachineInstr *MI) const { + MachineInstr &MI) const { // If the pointer is store in VGPRs, then we need to move them to // SGPRs using v_readfirstlane. This is safe because we only select // loads with uniform pointers to SMRD instruction so we know the // pointer value is uniform. - MachineOperand *SBase = getNamedOperand(*MI, AMDGPU::OpName::sbase); + MachineOperand *SBase = getNamedOperand(MI, AMDGPU::OpName::sbase); if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) { unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI); SBase->setReg(SGPR); } } -void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { - MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); +void SIInstrInfo::legalizeOperands(MachineInstr &MI) const { + MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); // Legalize VOP2 - if (isVOP2(*MI) || isVOPC(*MI)) { + if (isVOP2(MI) || isVOPC(MI)) { legalizeOperandsVOP2(MRI, MI); return; } // Legalize VOP3 - if (isVOP3(*MI)) { + if (isVOP3(MI)) { legalizeOperandsVOP3(MRI, MI); return; } // Legalize SMRD - if (isSMRD(*MI)) { + if (isSMRD(MI)) { legalizeOperandsSMRD(MRI, MI); return; } @@ -2229,14 +2225,14 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { // Legalize REG_SEQUENCE and PHI // The register class of the operands much be the same type as the register // class of the output. - if (MI->getOpcode() == AMDGPU::PHI) { + if (MI.getOpcode() == AMDGPU::PHI) { const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr; - for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { - if (!MI->getOperand(i).isReg() || - !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) + for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { + if (!MI.getOperand(i).isReg() || + !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg())) continue; const TargetRegisterClass *OpRC = - MRI.getRegClass(MI->getOperand(i).getReg()); + MRI.getRegClass(MI.getOperand(i).getReg()); if (RI.hasVGPRs(OpRC)) { VRC = OpRC; } else { @@ -2247,7 +2243,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { // If any of the operands are VGPR registers, then they all most be // otherwise we will create illegal VGPR->SGPR copies when legalizing // them. - if (VRC || !RI.isSGPRClass(getOpRegClass(*MI, 0))) { + if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) { if (!VRC) { assert(SRC); VRC = RI.getEquivalentVGPRClass(SRC); @@ -2258,18 +2254,18 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { } // Update all the operands so they have the same type. - for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) { - MachineOperand &Op = MI->getOperand(I); + for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { + MachineOperand &Op = MI.getOperand(I); if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) continue; unsigned DstReg = MRI.createVirtualRegister(RC); // MI is a PHI instruction. - MachineBasicBlock *InsertBB = MI->getOperand(I + 1).getMBB(); + MachineBasicBlock *InsertBB = MI.getOperand(I + 1).getMBB(); MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator(); - BuildMI(*InsertBB, Insert, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg) - .addOperand(Op); + BuildMI(*InsertBB, Insert, MI.getDebugLoc(), get(AMDGPU::COPY), DstReg) + .addOperand(Op); Op.setReg(DstReg); } } @@ -2277,15 +2273,15 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { // REG_SEQUENCE doesn't really require operand legalization, but if one has a // VGPR dest type and SGPR sources, insert copies so all operands are // VGPRs. This seems to help operand folding / the register coalescer. - if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) { - MachineBasicBlock *MBB = MI->getParent(); - const TargetRegisterClass *DstRC = getOpRegClass(*MI, 0); + if (MI.getOpcode() == AMDGPU::REG_SEQUENCE) { + MachineBasicBlock *MBB = MI.getParent(); + const TargetRegisterClass *DstRC = getOpRegClass(MI, 0); if (RI.hasVGPRs(DstRC)) { // Update all the operands so they are VGPR register classes. These may // not be the same register class because REG_SEQUENCE supports mixing // subregister index types e.g. sub0_sub1 + sub2 + sub3 - for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) { - MachineOperand &Op = MI->getOperand(I); + for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { + MachineOperand &Op = MI.getOperand(I); if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) continue; @@ -2296,8 +2292,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { unsigned DstReg = MRI.createVirtualRegister(VRC); - BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg) - .addOperand(Op); + BuildMI(*MBB, MI, MI.getDebugLoc(), get(AMDGPU::COPY), DstReg) + .addOperand(Op); Op.setReg(DstReg); Op.setIsKill(); @@ -2309,30 +2305,30 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { // Legalize INSERT_SUBREG // src0 must have the same register class as dst - if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) { - unsigned Dst = MI->getOperand(0).getReg(); - unsigned Src0 = MI->getOperand(1).getReg(); + if (MI.getOpcode() == AMDGPU::INSERT_SUBREG) { + unsigned Dst = MI.getOperand(0).getReg(); + unsigned Src0 = MI.getOperand(1).getReg(); const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0); if (DstRC != Src0RC) { - MachineBasicBlock &MBB = *MI->getParent(); + MachineBasicBlock &MBB = *MI.getParent(); unsigned NewSrc0 = MRI.createVirtualRegister(DstRC); - BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), NewSrc0) - .addReg(Src0); - MI->getOperand(1).setReg(NewSrc0); + BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::COPY), NewSrc0) + .addReg(Src0); + MI.getOperand(1).setReg(NewSrc0); } return; } // Legalize MIMG - if (isMIMG(*MI)) { - MachineOperand *SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc); + if (isMIMG(MI)) { + MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc); if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) { unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI); SRsrc->setReg(SGPR); } - MachineOperand *SSamp = getNamedOperand(*MI, AMDGPU::OpName::ssamp); + MachineOperand *SSamp = getNamedOperand(MI, AMDGPU::OpName::ssamp); if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) { unsigned SGPR = readlaneVGPRToSGPR(SSamp->getReg(), MI, MRI); SSamp->setReg(SGPR); @@ -2344,11 +2340,11 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { // FIXME: If we start using the non-addr64 instructions for compute, we // may need to legalize them here. int SRsrcIdx = - AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::srsrc); + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc); if (SRsrcIdx != -1) { // We have an MUBUF instruction - MachineOperand *SRsrc = &MI->getOperand(SRsrcIdx); - unsigned SRsrcRC = get(MI->getOpcode()).OpInfo[SRsrcIdx].RegClass; + MachineOperand *SRsrc = &MI.getOperand(SRsrcIdx); + unsigned SRsrcRC = get(MI.getOpcode()).OpInfo[SRsrcIdx].RegClass; if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()), RI.getRegClass(SRsrcRC))) { // The operands are legal. @@ -2356,7 +2352,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { return; } - MachineBasicBlock &MBB = *MI->getParent(); + MachineBasicBlock &MBB = *MI.getParent(); // Extract the ptr from the resource descriptor. unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc, @@ -2370,30 +2366,27 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { uint64_t RsrcDataFormat = getDefaultRsrcDataFormat(); // Zero64 = 0 - BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64), - Zero64) - .addImm(0); + BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B64), Zero64) + .addImm(0); // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0} - BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), - SRsrcFormatLo) - .addImm(RsrcDataFormat & 0xFFFFFFFF); + BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatLo) + .addImm(RsrcDataFormat & 0xFFFFFFFF); // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32} - BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), - SRsrcFormatHi) - .addImm(RsrcDataFormat >> 32); + BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatHi) + .addImm(RsrcDataFormat >> 32); // NewSRsrc = {Zero64, SRsrcFormat} - BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc) - .addReg(Zero64) - .addImm(AMDGPU::sub0_sub1) - .addReg(SRsrcFormatLo) - .addImm(AMDGPU::sub2) - .addReg(SRsrcFormatHi) - .addImm(AMDGPU::sub3); - - MachineOperand *VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr); + BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc) + .addReg(Zero64) + .addImm(AMDGPU::sub0_sub1) + .addReg(SRsrcFormatLo) + .addImm(AMDGPU::sub2) + .addReg(SRsrcFormatHi) + .addImm(AMDGPU::sub3); + + MachineOperand *VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr); unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); if (VAddr) { // This is already an ADDR64 instruction so we need to add the pointer @@ -2402,7 +2395,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); // NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0 - DebugLoc DL = MI->getDebugLoc(); + DebugLoc DL = MI.getDebugLoc(); BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo) .addReg(SRsrcPtr, 0, AMDGPU::sub0) .addReg(VAddr->getReg(), 0, AMDGPU::sub0); @@ -2413,11 +2406,11 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { .addReg(VAddr->getReg(), 0, AMDGPU::sub1); // NewVaddr = {NewVaddrHi, NewVaddrLo} - BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr) - .addReg(NewVAddrLo) - .addImm(AMDGPU::sub0) - .addReg(NewVAddrHi) - .addImm(AMDGPU::sub1); + BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr) + .addReg(NewVAddrLo) + .addImm(AMDGPU::sub0) + .addReg(NewVAddrHi) + .addImm(AMDGPU::sub1); } else { // This instructions is the _OFFSET variant, so we need to convert it to // ADDR64. @@ -2425,70 +2418,70 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { < SISubtarget::VOLCANIC_ISLANDS && "FIXME: Need to emit flat atomics here"); - MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata); - MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset); - MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset); - unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode()); + MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata); + MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset); + MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset); + unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI.getOpcode()); // Atomics rith return have have an additional tied operand and are // missing some of the special bits. - MachineOperand *VDataIn = getNamedOperand(*MI, AMDGPU::OpName::vdata_in); + MachineOperand *VDataIn = getNamedOperand(MI, AMDGPU::OpName::vdata_in); MachineInstr *Addr64; if (!VDataIn) { // Regular buffer load / store. - MachineInstrBuilder MIB - = BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode)) - .addOperand(*VData) - .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. - // This will be replaced later - // with the new value of vaddr. - .addOperand(*SRsrc) - .addOperand(*SOffset) - .addOperand(*Offset); + MachineInstrBuilder MIB = + BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode)) + .addOperand(*VData) + .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. + // This will be replaced later + // with the new value of vaddr. + .addOperand(*SRsrc) + .addOperand(*SOffset) + .addOperand(*Offset); // Atomics do not have this operand. - if (const MachineOperand *GLC - = getNamedOperand(*MI, AMDGPU::OpName::glc)) { + if (const MachineOperand *GLC = + getNamedOperand(MI, AMDGPU::OpName::glc)) { MIB.addImm(GLC->getImm()); } - MIB.addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc)); + MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc)); - if (const MachineOperand *TFE - = getNamedOperand(*MI, AMDGPU::OpName::tfe)) { + if (const MachineOperand *TFE = + getNamedOperand(MI, AMDGPU::OpName::tfe)) { MIB.addImm(TFE->getImm()); } - MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); Addr64 = MIB; } else { // Atomics with return. - Addr64 = BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode)) - .addOperand(*VData) - .addOperand(*VDataIn) - .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. - // This will be replaced later - // with the new value of vaddr. - .addOperand(*SRsrc) - .addOperand(*SOffset) - .addOperand(*Offset) - .addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc)) - .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + Addr64 = BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode)) + .addOperand(*VData) + .addOperand(*VDataIn) + .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. + // This will be replaced later + // with the new value of vaddr. + .addOperand(*SRsrc) + .addOperand(*SOffset) + .addOperand(*Offset) + .addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc)) + .setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); } - MI->removeFromParent(); - MI = Addr64; + MI.removeFromParent(); // NewVaddr = {NewVaddrHi, NewVaddrLo} - BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr) - .addReg(SRsrcPtr, 0, AMDGPU::sub0) - .addImm(AMDGPU::sub0) - .addReg(SRsrcPtr, 0, AMDGPU::sub1) - .addImm(AMDGPU::sub1); - - VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr); - SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc); + BuildMI(MBB, Addr64, Addr64->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), + NewVAddr) + .addReg(SRsrcPtr, 0, AMDGPU::sub0) + .addImm(AMDGPU::sub0) + .addReg(SRsrcPtr, 0, AMDGPU::sub1) + .addImm(AMDGPU::sub1); + + VAddr = getNamedOperand(*Addr64, AMDGPU::OpName::vaddr); + SRsrc = getNamedOperand(*Addr64, AMDGPU::OpName::srsrc); } // Update the instruction to use NewVaddr @@ -2503,12 +2496,12 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { Worklist.push_back(&TopInst); while (!Worklist.empty()) { - MachineInstr *Inst = Worklist.pop_back_val(); - MachineBasicBlock *MBB = Inst->getParent(); + MachineInstr &Inst = *Worklist.pop_back_val(); + MachineBasicBlock *MBB = Inst.getParent(); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - unsigned Opcode = Inst->getOpcode(); - unsigned NewOpcode = getVALUOp(*Inst); + unsigned Opcode = Inst.getOpcode(); + unsigned NewOpcode = getVALUOp(Inst); // Handle some special cases switch (Opcode) { @@ -2516,32 +2509,32 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { break; case AMDGPU::S_AND_B64: splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64); - Inst->eraseFromParent(); + Inst.eraseFromParent(); continue; case AMDGPU::S_OR_B64: splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64); - Inst->eraseFromParent(); + Inst.eraseFromParent(); continue; case AMDGPU::S_XOR_B64: splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64); - Inst->eraseFromParent(); + Inst.eraseFromParent(); continue; case AMDGPU::S_NOT_B64: splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32); - Inst->eraseFromParent(); + Inst.eraseFromParent(); continue; case AMDGPU::S_BCNT1_I32_B64: splitScalar64BitBCNT(Worklist, Inst); - Inst->eraseFromParent(); + Inst.eraseFromParent(); continue; case AMDGPU::S_BFE_I64: { splitScalar64BitBFE(Worklist, Inst); - Inst->eraseFromParent(); + Inst.eraseFromParent(); continue; } @@ -2584,15 +2577,16 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { case AMDGPU::S_ABS_I32: lowerScalarAbs(Worklist, Inst); - Inst->eraseFromParent(); + Inst.eraseFromParent(); continue; case AMDGPU::S_CBRANCH_SCC0: case AMDGPU::S_CBRANCH_SCC1: // Clear unused bits of vcc - BuildMI(*MBB, Inst, Inst->getDebugLoc(), get(AMDGPU::S_AND_B64), AMDGPU::VCC) - .addReg(AMDGPU::EXEC) - .addReg(AMDGPU::VCC); + BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B64), + AMDGPU::VCC) + .addReg(AMDGPU::EXEC) + .addReg(AMDGPU::VCC); break; case AMDGPU::S_BFE_U64: @@ -2609,15 +2603,15 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { // Use the new VALU Opcode. const MCInstrDesc &NewDesc = get(NewOpcode); - Inst->setDesc(NewDesc); + Inst.setDesc(NewDesc); // Remove any references to SCC. Vector instructions can't read from it, and // We're just about to add the implicit use / defs of VCC, and we don't want // both. - for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) { - MachineOperand &Op = Inst->getOperand(i); + for (unsigned i = Inst.getNumOperands() - 1; i > 0; --i) { + MachineOperand &Op = Inst.getOperand(i); if (Op.isReg() && Op.getReg() == AMDGPU::SCC) { - Inst->RemoveOperand(i); + Inst.RemoveOperand(i); addSCCDefUsersToVALUWorklist(Inst, Worklist); } } @@ -2626,19 +2620,19 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { // We are converting these to a BFE, so we need to add the missing // operands for the size and offset. unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16; - Inst->addOperand(MachineOperand::CreateImm(0)); - Inst->addOperand(MachineOperand::CreateImm(Size)); + Inst.addOperand(MachineOperand::CreateImm(0)); + Inst.addOperand(MachineOperand::CreateImm(Size)); } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) { // The VALU version adds the second operand to the result, so insert an // extra 0 operand. - Inst->addOperand(MachineOperand::CreateImm(0)); + Inst.addOperand(MachineOperand::CreateImm(0)); } - Inst->addImplicitDefUseOperands(*Inst->getParent()->getParent()); + Inst.addImplicitDefUseOperands(*Inst.getParent()->getParent()); if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) { - const MachineOperand &OffsetWidthOp = Inst->getOperand(2); + const MachineOperand &OffsetWidthOp = Inst.getOperand(2); // If we need to move this to VGPRs, we need to unpack the second operand // back into the 2 separate ones for bit offset and width. assert(OffsetWidthOp.isImm() && @@ -2647,20 +2641,20 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. - Inst->RemoveOperand(2); // Remove old immediate. - Inst->addOperand(MachineOperand::CreateImm(Offset)); - Inst->addOperand(MachineOperand::CreateImm(BitWidth)); + Inst.RemoveOperand(2); // Remove old immediate. + Inst.addOperand(MachineOperand::CreateImm(Offset)); + Inst.addOperand(MachineOperand::CreateImm(BitWidth)); } - bool HasDst = Inst->getOperand(0).isReg() && Inst->getOperand(0).isDef(); + bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef(); unsigned NewDstReg = AMDGPU::NoRegister; if (HasDst) { // Update the destination register class. - const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(*Inst); + const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst); if (!NewDstRC) continue; - unsigned DstReg = Inst->getOperand(0).getReg(); + unsigned DstReg = Inst.getOperand(0).getReg(); NewDstReg = MRI.createVirtualRegister(NewDstRC); MRI.replaceRegWith(DstReg, NewDstReg); } @@ -2682,14 +2676,14 @@ const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const { } void SIInstrInfo::lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist, - MachineInstr *Inst) const { - MachineBasicBlock &MBB = *Inst->getParent(); + MachineInstr &Inst) const { + MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); MachineBasicBlock::iterator MII = Inst; - DebugLoc DL = Inst->getDebugLoc(); + DebugLoc DL = Inst.getDebugLoc(); - MachineOperand &Dest = Inst->getOperand(0); - MachineOperand &Src = Inst->getOperand(1); + MachineOperand &Dest = Inst.getOperand(0); + MachineOperand &Src = Inst.getOperand(1); unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); @@ -2706,15 +2700,14 @@ void SIInstrInfo::lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist, } void SIInstrInfo::splitScalar64BitUnaryOp( - SmallVectorImpl<MachineInstr *> &Worklist, - MachineInstr *Inst, - unsigned Opcode) const { - MachineBasicBlock &MBB = *Inst->getParent(); + SmallVectorImpl<MachineInstr *> &Worklist, MachineInstr &Inst, + unsigned Opcode) const { + MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - MachineOperand &Dest = Inst->getOperand(0); - MachineOperand &Src0 = Inst->getOperand(1); - DebugLoc DL = Inst->getDebugLoc(); + MachineOperand &Dest = Inst.getOperand(0); + MachineOperand &Src0 = Inst.getOperand(1); + DebugLoc DL = Inst.getDebugLoc(); MachineBasicBlock::iterator MII = Inst; @@ -2760,16 +2753,15 @@ void SIInstrInfo::splitScalar64BitUnaryOp( } void SIInstrInfo::splitScalar64BitBinaryOp( - SmallVectorImpl<MachineInstr *> &Worklist, - MachineInstr *Inst, - unsigned Opcode) const { - MachineBasicBlock &MBB = *Inst->getParent(); + SmallVectorImpl<MachineInstr *> &Worklist, MachineInstr &Inst, + unsigned Opcode) const { + MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - MachineOperand &Dest = Inst->getOperand(0); - MachineOperand &Src0 = Inst->getOperand(1); - MachineOperand &Src1 = Inst->getOperand(2); - DebugLoc DL = Inst->getDebugLoc(); + MachineOperand &Dest = Inst.getOperand(0); + MachineOperand &Src0 = Inst.getOperand(1); + MachineOperand &Src1 = Inst.getOperand(2); + DebugLoc DL = Inst.getDebugLoc(); MachineBasicBlock::iterator MII = Inst; @@ -2795,9 +2787,9 @@ void SIInstrInfo::splitScalar64BitBinaryOp( const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC); - MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0) - .addOperand(SrcReg0Sub0) - .addOperand(SrcReg1Sub0); + MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0) + .addOperand(SrcReg0Sub0) + .addOperand(SrcReg1Sub0); MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC); @@ -2805,9 +2797,9 @@ void SIInstrInfo::splitScalar64BitBinaryOp( AMDGPU::sub1, Src1SubRC); unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC); - MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1) - .addOperand(SrcReg0Sub1) - .addOperand(SrcReg1Sub1); + MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1) + .addOperand(SrcReg0Sub1) + .addOperand(SrcReg1Sub1); unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC); BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) @@ -2827,16 +2819,16 @@ void SIInstrInfo::splitScalar64BitBinaryOp( addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); } -void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist, - MachineInstr *Inst) const { - MachineBasicBlock &MBB = *Inst->getParent(); +void SIInstrInfo::splitScalar64BitBCNT( + SmallVectorImpl<MachineInstr *> &Worklist, MachineInstr &Inst) const { + MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); MachineBasicBlock::iterator MII = Inst; - DebugLoc DL = Inst->getDebugLoc(); + DebugLoc DL = Inst.getDebugLoc(); - MachineOperand &Dest = Inst->getOperand(0); - MachineOperand &Src = Inst->getOperand(1); + MachineOperand &Dest = Inst.getOperand(0); + MachineOperand &Src = Inst.getOperand(1); const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64); const TargetRegisterClass *SrcRC = Src.isReg() ? @@ -2869,24 +2861,22 @@ void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist } void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist, - MachineInstr *Inst) const { - MachineBasicBlock &MBB = *Inst->getParent(); + MachineInstr &Inst) const { + MachineBasicBlock &MBB = *Inst.getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); MachineBasicBlock::iterator MII = Inst; - DebugLoc DL = Inst->getDebugLoc(); + DebugLoc DL = Inst.getDebugLoc(); - MachineOperand &Dest = Inst->getOperand(0); - uint32_t Imm = Inst->getOperand(2).getImm(); + MachineOperand &Dest = Inst.getOperand(0); + uint32_t Imm = Inst.getOperand(2).getImm(); uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. (void) Offset; // Only sext_inreg cases handled. - assert(Inst->getOpcode() == AMDGPU::S_BFE_I64 && - BitWidth <= 32 && - Offset == 0 && - "Not implemented"); + assert(Inst.getOpcode() == AMDGPU::S_BFE_I64 && BitWidth <= 32 && + Offset == 0 && "Not implemented"); if (BitWidth < 32) { unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); @@ -2894,9 +2884,9 @@ void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist, unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo) - .addReg(Inst->getOperand(1).getReg(), 0, AMDGPU::sub0) - .addImm(0) - .addImm(BitWidth); + .addReg(Inst.getOperand(1).getReg(), 0, AMDGPU::sub0) + .addImm(0) + .addImm(BitWidth); BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi) .addImm(31) @@ -2913,7 +2903,7 @@ void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist, return; } - MachineOperand &Src = Inst->getOperand(1); + MachineOperand &Src = Inst.getOperand(1); unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); @@ -2944,13 +2934,13 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist( } } -void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineInstr *SCCDefInst, - SmallVectorImpl<MachineInstr *> &Worklist) const { +void SIInstrInfo::addSCCDefUsersToVALUWorklist( + MachineInstr &SCCDefInst, SmallVectorImpl<MachineInstr *> &Worklist) const { // This assumes that all the users of SCC are in the same block // as the SCC def. for (MachineBasicBlock::iterator I = SCCDefInst, - E = SCCDefInst->getParent()->end(); I != E; ++I) { - + E = SCCDefInst.getParent()->end(); + I != E; ++I) { // Exit if we find another SCC def. if (I->findRegisterDefOperandIdx(AMDGPU::SCC) != -1) return; @@ -2985,9 +2975,9 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass( } // Find the one SGPR operand we are allowed to use. -unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI, +unsigned SIInstrInfo::findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const { - const MCInstrDesc &Desc = MI->getDesc(); + const MCInstrDesc &Desc = MI.getDesc(); // Find the one SGPR operand we are allowed to use. // @@ -2998,19 +2988,19 @@ unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI, // // If the operand's class is an SGPR, we can never move it. - unsigned SGPRReg = findImplicitSGPRRead(*MI); + unsigned SGPRReg = findImplicitSGPRRead(MI); if (SGPRReg != AMDGPU::NoRegister) return SGPRReg; unsigned UsedSGPRs[3] = { AMDGPU::NoRegister }; - const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); for (unsigned i = 0; i < 3; ++i) { int Idx = OpIndices[i]; if (Idx == -1) break; - const MachineOperand &MO = MI->getOperand(Idx); + const MachineOperand &MO = MI.getOperand(Idx); if (!MO.isReg()) continue; @@ -3095,14 +3085,14 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const { return Rsrc23; } -bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr *MI) const { - unsigned Opc = MI->getOpcode(); +bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr &MI) const { + unsigned Opc = MI.getOpcode(); return isSMRD(Opc); } -bool SIInstrInfo::isHighLatencyInstruction(const MachineInstr *MI) const { - unsigned Opc = MI->getOpcode(); +bool SIInstrInfo::isHighLatencyInstruction(const MachineInstr &MI) const { + unsigned Opc = MI.getOpcode(); return isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index dc8b328ee7c..1d2c0ba8f56 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -54,40 +54,39 @@ private: unsigned SubIdx, const TargetRegisterClass *SubRC) const; - void swapOperands(MachineBasicBlock::iterator Inst) const; + void swapOperands(MachineInstr &Inst) const; void lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist, - MachineInstr *Inst) const; + MachineInstr &Inst) const; void splitScalar64BitUnaryOp(SmallVectorImpl<MachineInstr *> &Worklist, - MachineInstr *Inst, unsigned Opcode) const; + MachineInstr &Inst, unsigned Opcode) const; void splitScalar64BitBinaryOp(SmallVectorImpl<MachineInstr *> &Worklist, - MachineInstr *Inst, unsigned Opcode) const; + MachineInstr &Inst, unsigned Opcode) const; void splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist, - MachineInstr *Inst) const; + MachineInstr &Inst) const; void splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist, - MachineInstr *Inst) const; + MachineInstr &Inst) const; void addUsersToMoveToVALUWorklist( unsigned Reg, MachineRegisterInfo &MRI, SmallVectorImpl<MachineInstr *> &Worklist) const; - void addSCCDefUsersToVALUWorklist( - MachineInstr *SCCDefInst, SmallVectorImpl<MachineInstr *> &Worklist) const; + void + addSCCDefUsersToVALUWorklist(MachineInstr &SCCDefInst, + SmallVectorImpl<MachineInstr *> &Worklist) const; const TargetRegisterClass * getDestEquivalentVGPRClass(const MachineInstr &Inst) const; - bool checkInstOffsetsDoNotOverlap(MachineInstr *MIa, - MachineInstr *MIb) const; + bool checkInstOffsetsDoNotOverlap(MachineInstr &MIa, MachineInstr &MIb) const; - unsigned findUsedSGPR(const MachineInstr *MI, int OpIndices[3]) const; + unsigned findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const; protected: - MachineInstr *commuteInstructionImpl(MachineInstr *MI, - bool NewMI, + MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override; @@ -98,45 +97,40 @@ public: return RI; } - bool isReallyTriviallyReMaterializable(const MachineInstr *MI, + bool isReallyTriviallyReMaterializable(const MachineInstr &MI, AliasAnalysis *AA) const override; bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const override; - bool getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, + bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) const final; - bool shouldClusterMemOps(MachineInstr *FirstLdSt, - MachineInstr *SecondLdSt, + bool shouldClusterMemOps(MachineInstr &FirstLdSt, MachineInstr &SecondLdSt, unsigned NumLoads) const final; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override; - unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - RegScavenger *RS, - unsigned TmpReg, - unsigned Offset, - unsigned Size) const; + unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB, MachineInstr &MI, + RegScavenger *RS, unsigned TmpReg, + unsigned Offset, unsigned Size) const; void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned SrcReg, bool isKill, int FrameIndex, + MachineBasicBlock::iterator MI, unsigned SrcReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, + MachineBasicBlock::iterator MI, unsigned DestReg, + int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; - bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override; + bool expandPostRAPseudo(MachineInstr &MI) const override; // \brief Returns an opcode that can be used to move a value to a \p DstRC // register. If there is no hardware instruction that can store to \p @@ -146,8 +140,7 @@ public: LLVM_READONLY int commuteOpcode(const MachineInstr &MI) const; - bool findCommutedOpIndices(MachineInstr *MI, - unsigned &SrcOpIdx1, + bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override; bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, @@ -164,20 +157,20 @@ public: bool ReverseBranchCondition( SmallVectorImpl<MachineOperand> &Cond) const override; - bool areMemAccessesTriviallyDisjoint( - MachineInstr *MIa, MachineInstr *MIb, - AliasAnalysis *AA = nullptr) const override; + bool + areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, + AliasAnalysis *AA = nullptr) const override; - bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, - unsigned Reg, MachineRegisterInfo *MRI) const final; + bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, + MachineRegisterInfo *MRI) const final; unsigned getMachineCSELookAheadLimit() const override { return 500; } MachineInstr *convertToThreeAddress(MachineFunction::iterator &MBB, - MachineBasicBlock::iterator &MI, + MachineInstr &MI, LiveVariables *LV) const override; - bool isSchedulingBoundary(const MachineInstr *MI, + bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override; @@ -361,7 +354,7 @@ public: bool isInlineConstant(const MachineOperand &MO, unsigned OpSize) const; bool isLiteralConstant(const MachineOperand &MO, unsigned OpSize) const; - bool isImmOperandLegal(const MachineInstr *MI, unsigned OpNo, + bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, const MachineOperand &MO) const; /// \brief Return true if this 64-bit VALU instruction has a 32-bit encoding. @@ -380,7 +373,7 @@ public: bool hasModifiersSet(const MachineInstr &MI, unsigned OpName) const; - bool verifyInstruction(const MachineInstr *MI, + bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override; static unsigned getVALUOp(const MachineInstr &MI); @@ -428,11 +421,11 @@ public: /// /// If the operand being legalized is a register, then a COPY will be used /// instead of MOV. - void legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const; + void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const; /// \brief Check if \p MO is a legal operand if it was the \p OpIdx Operand /// for \p MI. - bool isOperandLegal(const MachineInstr *MI, unsigned OpIdx, + bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO = nullptr) const; /// \brief Check if \p MO would be a valid operand for the given operand @@ -450,23 +443,23 @@ public: /// \brief Legalize operands in \p MI by either commuting it or inserting a /// copy of src1. - void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr *MI) const; + void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const; /// \brief Fix operands in \p MI to satisfy constant bus requirements. - void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr *MI) const; + void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const; /// Copy a value from a VGPR (\p SrcReg) to SGPR. This function can only /// be used when it is know that the value in SrcReg is same across all /// threads in the wave. /// \returns The SGPR register that \p SrcReg was copied to. - unsigned readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr *UseMI, - MachineRegisterInfo &MRI) const; + unsigned readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI, + MachineRegisterInfo &MRI) const; - void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr *MI) const; + void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const; /// \brief Legalize all operands in this instruction. This function may /// create new instruction and insert them before \p MI. - void legalizeOperands(MachineInstr *MI) const; + void legalizeOperands(MachineInstr &MI) const; /// \brief Replace this instruction's opcode with the equivalent VALU /// opcode. This function will also move the users of \p MI to the @@ -505,8 +498,8 @@ public: uint64_t getDefaultRsrcDataFormat() const; uint64_t getScratchRsrcWords23() const; - bool isLowLatencyInstruction(const MachineInstr *MI) const; - bool isHighLatencyInstruction(const MachineInstr *MI) const; + bool isLowLatencyInstruction(const MachineInstr &MI) const; + bool isHighLatencyInstruction(const MachineInstr &MI) const; /// \brief Return the descriptor of the target-specific machine instruction /// that corresponds to the specified pseudo or native opcode. diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp index 147eb99dc2a..cff9477ae95 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -1694,7 +1694,7 @@ void SIScheduleDAGMI::moveLowLatencies() { for (SDep& PredDep : SU->Preds) { SUnit *Pred = PredDep.getSUnit(); - if (SITII->isLowLatencyInstruction(Pred->getInstr())) { + if (SITII->isLowLatencyInstruction(*Pred->getInstr())) { IsLowLatencyUser = true; } if (Pred->NodeNum >= DAGSize) @@ -1704,7 +1704,7 @@ void SIScheduleDAGMI::moveLowLatencies() { MinPos = PredPos + 1; } - if (SITII->isLowLatencyInstruction(SU->getInstr())) { + if (SITII->isLowLatencyInstruction(*SU->getInstr())) { unsigned BestPos = LastLowLatencyUser + 1; if ((int)BestPos <= LastLowLatencyPos) BestPos = LastLowLatencyPos + 1; @@ -1729,7 +1729,7 @@ void SIScheduleDAGMI::moveLowLatencies() { bool CopyForLowLat = false; for (SDep& SuccDep : SU->Succs) { SUnit *Succ = SuccDep.getSUnit(); - if (SITII->isLowLatencyInstruction(Succ->getInstr())) { + if (SITII->isLowLatencyInstruction(*Succ->getInstr())) { CopyForLowLat = true; } } @@ -1814,12 +1814,12 @@ void SIScheduleDAGMI::schedule() SUnit *SU = &SUnits[i]; unsigned BaseLatReg; int64_t OffLatReg; - if (SITII->isLowLatencyInstruction(SU->getInstr())) { + if (SITII->isLowLatencyInstruction(*SU->getInstr())) { IsLowLatencySU[i] = 1; - if (SITII->getMemOpBaseRegImmOfs(SU->getInstr(), BaseLatReg, - OffLatReg, TRI)) + if (SITII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseLatReg, OffLatReg, + TRI)) LowLatencyOffset[i] = OffLatReg; - } else if (SITII->isHighLatencyInstruction(SU->getInstr())) + } else if (SITII->isHighLatencyInstruction(*SU->getInstr())) IsHighLatencySU[i] = 1; } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 5a934ecec7a..748209bd065 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -651,7 +651,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, default: { int64_t Offset = FrameInfo->getObjectOffset(Index); FIOp.ChangeToImmediate(Offset); - if (!TII->isImmOperandLegal(MI, FIOperandNum, FIOp)) { + if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) { unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 517533ee734..6cba55300a8 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -172,7 +172,7 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, } // We have failed to fold src0, so commute the instruction and try again. - if (TryToCommute && MI.isCommutable() && TII->commuteInstruction(&MI)) + if (TryToCommute && MI.isCommutable() && TII->commuteInstruction(MI)) foldImmediates(MI, TII, MRI, false); } @@ -312,7 +312,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { if (!canShrink(MI, TII, TRI, MRI)) { // Try commuting the instruction and see if that enables us to shrink // it. - if (!MI.isCommutable() || !TII->commuteInstruction(&MI) || + if (!MI.isCommutable() || !TII->commuteInstruction(MI) || !canShrink(MI, TII, TRI, MRI)) continue; } |