diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp | 81 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 109 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 101 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertSkips.cpp | 49 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertWaits.cpp | 45 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 41 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 20 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineScheduler.h | 22 | 
8 files changed, 270 insertions, 198 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp index 7faeccdc5df..addcf4e5013 100644 --- a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp @@ -9,27 +9,39 @@  //==-----------------------------------------------------------------------===//  #include "AMDGPU.h" -#include "AMDGPUInstrInfo.h"  #include "AMDGPUSubtarget.h"  #include "R600InstrInfo.h" +#include "R600RegisterInfo.h"  #include "llvm/ADT/DepthFirstIterator.h"  #include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SmallPtrSet.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h"  #include "llvm/CodeGen/MachineDominators.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h"  #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h"  #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOperand.h"  #include "llvm/CodeGen/MachinePostDominators.h"  #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/Dominators.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Pass.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" +#include <cassert> +#include <cstddef>  #include <deque> +#include <iterator> +#include <map> +#include <utility> +#include <vector>  using namespace llvm; @@ -53,15 +65,19 @@ STATISTIC(numClonedBlock,           "CFGStructurizer cloned blocks");  STATISTIC(numClonedInstr,           "CFGStructurizer cloned instructions");  namespace llvm { +    void initializeAMDGPUCFGStructurizerPass(PassRegistry&); -} + +} // end namespace llvm + +namespace {  //===----------------------------------------------------------------------===//  //  // Miscellaneous utility for CFGStructurizer.  //  //===----------------------------------------------------------------------===// -namespace { +  #define SHOWNEWINSTR(i) \    DEBUG(dbgs() << "New instr: " << *i << "\n"); @@ -92,25 +108,19 @@ void ReverseVector(SmallVectorImpl<NodeT *> &Src) {    }  } -} // end anonymous namespace -  //===----------------------------------------------------------------------===//  //  // supporting data structure for CFGStructurizer  //  //===----------------------------------------------------------------------===// - -namespace { -  class BlockInformation {  public: -  bool IsRetired; -  int  SccNum; -  BlockInformation() : IsRetired(false), SccNum(INVALIDSCCNUM) {} -}; +  bool IsRetired = false; +  int SccNum = INVALIDSCCNUM; -} // end anonymous namespace +  BlockInformation() = default; +};  //===----------------------------------------------------------------------===//  // @@ -118,7 +128,6 @@ public:  //  //===----------------------------------------------------------------------===// -namespace {  class AMDGPUCFGStructurizer : public MachineFunctionPass {  public:    typedef SmallVector<MachineBasicBlock *, 32> MBBVector; @@ -133,8 +142,7 @@ public:    static char ID; -  AMDGPUCFGStructurizer() : -      MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) { +  AMDGPUCFGStructurizer() : MachineFunctionPass(ID) {      initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry());    } @@ -167,7 +175,7 @@ public:      MLI = &getAnalysis<MachineLoopInfo>();      DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););      MDT = &getAnalysis<MachineDominatorTree>(); -    DEBUG(MDT->print(dbgs(), (const llvm::Module*)nullptr);); +    DEBUG(MDT->print(dbgs(), (const Module*)nullptr););      PDT = &getAnalysis<MachinePostDominatorTree>();      DEBUG(PDT->print(dbgs()););      prepare(); @@ -180,8 +188,8 @@ protected:    MachineDominatorTree *MDT;    MachinePostDominatorTree *PDT;    MachineLoopInfo *MLI; -  const R600InstrInfo *TII; -  const R600RegisterInfo *TRI; +  const R600InstrInfo *TII = nullptr; +  const R600RegisterInfo *TRI = nullptr;    // PRINT FUNCTIONS    /// Print the ordered Blocks. @@ -198,6 +206,7 @@ protected:        }      }    } +    static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {      for (MachineLoop::iterator iter = LoopInfo.begin(),           iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) { @@ -263,7 +272,6 @@ protected:        MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);    static void wrapup(MachineBasicBlock *MBB); -    int patternMatch(MachineBasicBlock *MBB);    int patternMatchGroup(MachineBasicBlock *MBB);    int serialPatternMatch(MachineBasicBlock *MBB); @@ -328,7 +336,6 @@ protected:    void recordSccnum(MachineBasicBlock *MBB, int SCCNum);    void retireBlock(MachineBasicBlock *MBB); -  private:    MBBInfoMap BlockInfoMap;    LoopLandInfoMap LLInfoMap; @@ -337,6 +344,10 @@ private:    SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks;  }; +char AMDGPUCFGStructurizer::ID = 0; + +} // end anonymous namespace +  int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {    MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);    if (It == BlockInfoMap.end()) @@ -379,6 +390,7 @@ bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {    }    return false;  } +  AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(      MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,      bool AllowSideEntry) const { @@ -697,10 +709,8 @@ void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {     // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but     // there isn't such an interface yet.  alternatively, replace all the other     // blocks in the jump table with the entryBlk //} -  } -  bool AMDGPUCFGStructurizer::prepare() {    bool Changed = false; @@ -748,7 +758,6 @@ bool AMDGPUCFGStructurizer::prepare() {  }  bool AMDGPUCFGStructurizer::run() { -    //Assume reducible CFG...    DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n"); @@ -886,8 +895,6 @@ bool AMDGPUCFGStructurizer::run() {    return true;  } - -  void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {    int SccNum = 0;    MachineBasicBlock *MBB; @@ -941,7 +948,6 @@ int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {    return NumMatch;  } -  int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {    if (MBB->succ_size() != 1)      return 0; @@ -1039,7 +1045,7 @@ int AMDGPUCFGStructurizer::loopendPatternMatch() {      for (MachineLoop *ML : depth_first(It))        NestedLoops.push_front(ML); -  if (NestedLoops.size() == 0) +  if (NestedLoops.empty())      return 0;    // Process nested loop outside->inside (we did push_front), @@ -1074,7 +1080,7 @@ int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {    MachineBasicBlock *ExitBlk = *ExitBlks.begin();    assert(ExitBlk && "Loop has several exit block");    MBBVector LatchBlks; -  typedef GraphTraits<Inverse<MachineBasicBlock*> > InvMBBTraits; +  typedef GraphTraits<Inverse<MachineBasicBlock*>> InvMBBTraits;    InvMBBTraits::ChildIteratorType PI = InvMBBTraits::child_begin(LoopHeader),        PE = InvMBBTraits::child_end(LoopHeader);    for (; PI != PE; PI++) { @@ -1217,7 +1223,7 @@ void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(      }    } -    dbgs() << "\n"; +  dbgs() << "\n";  }  int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, @@ -1478,7 +1484,6 @@ void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,    if (LandMBB && TrueMBB && FalseMBB)      MBB->addSuccessor(LandMBB); -  }  void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk, @@ -1491,7 +1496,6 @@ void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,    DstBlk->replaceSuccessor(DstBlk, LandMBB);  } -  void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,      MachineBasicBlock *LandMBB) {    DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber() @@ -1727,11 +1731,6 @@ void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {           && "can't retire block yet");  } -char AMDGPUCFGStructurizer::ID = 0; - -} // end anonymous namespace - -  INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer",                        "AMDGPU CFG Structurizer", false, false)  INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 425fd529e1b..fed2e93073b 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -16,6 +16,7 @@  #include "Utils/AMDGPUAsmUtils.h"  #include "llvm/ADT/APFloat.h"  #include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h"  #include "llvm/ADT/SmallBitVector.h"  #include "llvm/ADT/SmallString.h"  #include "llvm/ADT/STLExtras.h" @@ -39,15 +40,12 @@  #include "llvm/MC/MCSubtargetInfo.h"  #include "llvm/MC/MCSymbol.h"  #include "llvm/Support/Casting.h" -#include "llvm/Support/Debug.h"  #include "llvm/Support/ELF.h"  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/MathExtras.h"  #include "llvm/Support/raw_ostream.h"  #include "llvm/Support/SMLoc.h"  #include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/MathExtras.h"  #include <algorithm>  #include <cassert>  #include <cstdint> @@ -56,7 +54,6 @@  #include <map>  #include <memory>  #include <string> -#include <vector>  using namespace llvm;  using namespace llvm::AMDGPU; @@ -695,9 +692,9 @@ raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {  // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next  // .amdgpu_hsa_kernel or at EOF.  class KernelScopeInfo { -  int SgprIndexUnusedMin; -  int VgprIndexUnusedMin; -  MCContext *Ctx; +  int SgprIndexUnusedMin = -1; +  int VgprIndexUnusedMin = -1; +  MCContext *Ctx = nullptr;    void usesSgprAt(int i) {      if (i >= SgprIndexUnusedMin) { @@ -708,6 +705,7 @@ class KernelScopeInfo {        }      }    } +    void usesVgprAt(int i) {      if (i >= VgprIndexUnusedMin) {        VgprIndexUnusedMin = ++i; @@ -717,14 +715,16 @@ class KernelScopeInfo {        }      }    } +  public: -  KernelScopeInfo() : SgprIndexUnusedMin(-1), VgprIndexUnusedMin(-1), Ctx(nullptr) -  {} +  KernelScopeInfo() = default; +    void initialize(MCContext &Context) {      Ctx = &Context;      usesSgprAt(SgprIndexUnusedMin = -1);      usesVgprAt(VgprIndexUnusedMin = -1);    } +    void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {      switch (RegKind) {        case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; @@ -738,9 +738,9 @@ class AMDGPUAsmParser : public MCTargetAsmParser {    const MCInstrInfo &MII;    MCAsmParser &Parser; -  unsigned ForcedEncodingSize; -  bool ForcedDPP; -  bool ForcedSDWA; +  unsigned ForcedEncodingSize = 0; +  bool ForcedDPP = false; +  bool ForcedSDWA = false;    KernelScopeInfo KernelScope;    /// @name Auto-generated Match Functions @@ -779,10 +779,7 @@ public:    AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,                 const MCInstrInfo &MII,                 const MCTargetOptions &Options) -      : MCTargetAsmParser(Options, STI), MII(MII), Parser(_Parser), -        ForcedEncodingSize(0), -        ForcedDPP(false), -        ForcedSDWA(false) { +      : MCTargetAsmParser(Options, STI), MII(MII), Parser(_Parser) {      MCAsmParserExtension::Initialize(Parser);      if (getSTI().getFeatureBits().none()) { @@ -1043,7 +1040,6 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const {        AsmParser->hasInv2PiInlineImm());    } -    // We got int literal token.    if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand      return AMDGPU::isInlinableLiteral64(Imm.Val, @@ -1132,7 +1128,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {      APInt Literal(64, Val);      switch (OpSize) { -    case 8: { +    case 8:        if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),                                         AsmParser->hasInv2PiInlineImm())) {          Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); @@ -1156,7 +1152,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {        // unclear how we should encode them. This case should be checked earlier        // in predicate methods (isLiteralImm())        llvm_unreachable("fp literal in 64-bit integer instruction."); -    } +      case 4:      case 2: {        bool lost; @@ -1180,7 +1176,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {    // Only sign extend inline immediates.    // FIXME: No errors on truncation    switch (OpSize) { -  case 4: { +  case 4:      if (isInt<32>(Val) &&          AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),                                       AsmParser->hasInv2PiInlineImm())) { @@ -1190,8 +1186,8 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {      Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));      return; -  } -  case 8: { + +  case 8:      if (AMDGPU::isInlinableLiteral64(Val,                                       AsmParser->hasInv2PiInlineImm())) {        Inst.addOperand(MCOperand::createImm(Val)); @@ -1200,8 +1196,8 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {      Inst.addOperand(MCOperand::createImm(Lo_32(Val)));      return; -  } -  case 2: { + +  case 2:      if (isInt<16>(Val) &&          AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),                                       AsmParser->hasInv2PiInlineImm())) { @@ -1211,7 +1207,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {      Inst.addOperand(MCOperand::createImm(Val & 0xffff));      return; -  } +    default:      llvm_unreachable("invalid operand size");    } @@ -1295,7 +1291,8 @@ static unsigned getSpecialRegForName(StringRef RegName) {      .Default(0);  } -bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { +bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, +                                    SMLoc &EndLoc) {    auto R = parseRegister();    if (!R) return true;    assert(R->isReg()); @@ -1305,20 +1302,43 @@ bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &End    return false;  } -bool AMDGPUAsmParser::AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum) -{ +bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, +                                            RegisterKind RegKind, unsigned Reg1, +                                            unsigned RegNum) {    switch (RegKind) {    case IS_SPECIAL: -    if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { Reg = AMDGPU::EXEC; RegWidth = 2; return true; } -    if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { Reg = AMDGPU::FLAT_SCR; RegWidth = 2; return true; } -    if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { Reg = AMDGPU::VCC; RegWidth = 2; return true; } -    if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { Reg = AMDGPU::TBA; RegWidth = 2; return true; } -    if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { Reg = AMDGPU::TMA; RegWidth = 2; return true; } +    if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { +      Reg = AMDGPU::EXEC; +      RegWidth = 2; +      return true; +    } +    if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { +      Reg = AMDGPU::FLAT_SCR; +      RegWidth = 2; +      return true; +    } +    if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { +      Reg = AMDGPU::VCC; +      RegWidth = 2; +      return true; +    } +    if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { +      Reg = AMDGPU::TBA; +      RegWidth = 2; +      return true; +    } +    if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { +      Reg = AMDGPU::TMA; +      RegWidth = 2; +      return true; +    }      return false;    case IS_VGPR:    case IS_SGPR:    case IS_TTMP: -    if (Reg1 != Reg + RegWidth) { return false; } +    if (Reg1 != Reg + RegWidth) { +      return false; +    }      RegWidth++;      return true;    default: @@ -1326,8 +1346,9 @@ bool AMDGPUAsmParser::AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, R    }  } -bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth, unsigned *DwordRegIndex) -{ +bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, +                                          unsigned &RegNum, unsigned &RegWidth, +                                          unsigned *DwordRegIndex) {    if (DwordRegIndex) { *DwordRegIndex = 0; }    const MCRegisterInfo *TRI = getContext().getRegisterInfo();    if (getLexer().is(AsmToken::Identifier)) { @@ -1528,7 +1549,8 @@ AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands) {  }  OperandMatchResultTy -AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm) { +AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, +                                              bool AllowImm) {    // XXX: During parsing we can't determine if minus sign means    // negate-modifier or negative immediate value.    // By default we suppose it is modifier. @@ -1539,7 +1561,8 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool Allo      Negate = true;    } -  if (getLexer().getKind() == AsmToken::Identifier && Parser.getTok().getString() == "abs") { +  if (getLexer().getKind() == AsmToken::Identifier && +      Parser.getTok().getString() == "abs") {      Parser.Lex();      Abs2 = true;      if (getLexer().isNot(AsmToken::LParen)) { @@ -1597,10 +1620,12 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool Allo  }  OperandMatchResultTy -AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm) { +AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, +                                               bool AllowImm) {    bool Sext = false; -  if (getLexer().getKind() == AsmToken::Identifier && Parser.getTok().getString() == "sext") { +  if (getLexer().getKind() == AsmToken::Identifier && +      Parser.getTok().getString() == "sext") {      Parser.Lex();      Sext = true;      if (getLexer().isNot(AsmToken::LParen)) { @@ -1667,7 +1692,6 @@ OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands)  }  unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { -    uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;    if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || @@ -1799,7 +1823,6 @@ bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {    return false;  } -  bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,                                                 uint32_t &Minor) {    if (ParseAsAbsoluteExpression(Major)) @@ -1816,7 +1839,6 @@ bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,  }  bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { -    uint32_t Major;    uint32_t Minor; @@ -2086,7 +2108,6 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,  OperandMatchResultTy  AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { -    // Try to parse with a custom parser    OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index e3143451b00..4dde35a7b2d 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -15,7 +15,6 @@  #ifdef _MSC_VER  // Provide M_PI.  #define _USE_MATH_DEFINES -#include <cmath>  #endif  #include "AMDGPU.h" @@ -26,15 +25,59 @@  #include "SIInstrInfo.h"  #include "SIMachineFunctionInfo.h"  #include "SIRegisterInfo.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h"  #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h"  #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/Analysis.h"  #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/DAGCombine.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h"  #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h"  #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h"  #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h"  #include "llvm/IR/DiagnosticInfo.h"  #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetCallingConv.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <cassert> +#include <cmath> +#include <cstdint> +#include <iterator> +#include <tuple> +#include <utility> +#include <vector>  using namespace llvm; @@ -43,7 +86,6 @@ static cl::opt<bool> EnableVGPRIndexMode(    cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),    cl::init(false)); -  static unsigned findFirstFreeSGPR(CCState &CCInfo) {    unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();    for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) { @@ -110,7 +152,6 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,    setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand);    setTruncStoreAction(MVT::v32i32, MVT::v32i8, Expand); -    setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);    setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);    setOperationAction(ISD::ConstantPool, MVT::v2i64, Expand); @@ -441,7 +482,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,      return false;    switch (AS) { -  case AMDGPUAS::GLOBAL_ADDRESS: { +  case AMDGPUAS::GLOBAL_ADDRESS:      if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {        // Assume the we will use FLAT for all global memory accesses        // on VI. @@ -456,8 +497,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,      }      return isLegalMUBUFAddressingMode(AM); -  } -  case AMDGPUAS::CONSTANT_ADDRESS: { + +  case AMDGPUAS::CONSTANT_ADDRESS:      // If the offset isn't a multiple of 4, it probably isn't going to be      // correctly aligned.      // FIXME: Can we get the real alignment here? @@ -494,13 +535,12 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,        return true;      return false; -  }    case AMDGPUAS::PRIVATE_ADDRESS:      return isLegalMUBUFAddressingMode(AM);    case AMDGPUAS::LOCAL_ADDRESS: -  case AMDGPUAS::REGION_ADDRESS: { +  case AMDGPUAS::REGION_ADDRESS:      // Basic, single offset DS instructions allow a 16-bit unsigned immediate      // field.      // XXX - If doing a 4-byte aligned 8-byte type access, we effectively have @@ -515,7 +555,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,        return true;      return false; -  } +    case AMDGPUAS::FLAT_ADDRESS:    case AMDGPUAS::UNKNOWN_ADDRESS_SPACE:      // For an unknown address space, this usually means that this is for some @@ -897,7 +937,6 @@ SDValue SITargetLowering::LowerFormalArguments(    SmallVector<SDValue, 16> Chains;    for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) { -      const ISD::InputArg &Arg = Ins[i];      if (Skipped[i]) {        InVals.push_back(DAG.getUNDEF(Arg.VT)); @@ -954,7 +993,6 @@ SDValue SITargetLowering::LowerFormalArguments(      SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);      if (Arg.VT.isVector()) { -        // Build a vector from the registers        Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());        unsigned NumElements = ParamType->getVectorNumElements(); @@ -1543,7 +1581,6 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,      return &MBB;    } -    const DebugLoc &DL = MI.getDebugLoc();    MachineBasicBlock::iterator I(&MI); @@ -1736,13 +1773,13 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(    }    switch (MI.getOpcode()) { -  case AMDGPU::SI_INIT_M0: { +  case AMDGPU::SI_INIT_M0:      BuildMI(*BB, MI.getIterator(), MI.getDebugLoc(),              TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)          .add(MI.getOperand(0));      MI.eraseFromParent();      return BB; -  } +    case AMDGPU::GET_GROUPSTATICSIZE: {      DebugLoc DL = MI.getDebugLoc();      BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOV_B32)) @@ -2001,7 +2038,6 @@ bool SITargetLowering::shouldEmitPCReloc(const GlobalValue *GV) const {  /// last parameter, also switches branch target with BR if the need arise  SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,                                        SelectionDAG &DAG) const { -    SDLoc DL(BRCOND);    SDNode *Intr = BRCOND.getOperand(1).getNode(); @@ -2399,17 +2435,15 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,    case Intrinsic::amdgcn_rsq:    case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name      return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1)); -  case Intrinsic::amdgcn_rsq_legacy: { +  case Intrinsic::amdgcn_rsq_legacy:      if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)        return emitRemovedIntrinsicError(DAG, DL, VT);      return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1)); -  } -  case Intrinsic::amdgcn_rcp_legacy: { +  case Intrinsic::amdgcn_rcp_legacy:      if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)        return emitRemovedIntrinsicError(DAG, DL, VT);      return DAG.getNode(AMDGPUISD::RCP_LEGACY, DL, VT, Op.getOperand(1)); -  }    case Intrinsic::amdgcn_rsq_clamp: {      if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS)        return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1)); @@ -2516,9 +2550,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,      return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,                                     Op->getVTList(), Ops, VT, MMO);    } -  case AMDGPUIntrinsic::amdgcn_fdiv_fast: { +  case AMDGPUIntrinsic::amdgcn_fdiv_fast:      return lowerFDIV_FAST(Op, DAG); -  }    case AMDGPUIntrinsic::SI_vs_load_input:      return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,                         Op.getOperand(1), @@ -2912,7 +2945,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {      // loads.      //      LLVM_FALLTHROUGH; -  case AMDGPUAS::GLOBAL_ADDRESS: { +  case AMDGPUAS::GLOBAL_ADDRESS:      if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) &&                    isMemOpHasNoClobberedMemOperand(Load))        return SDValue(); @@ -2920,14 +2953,13 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {      // have the same legalization requirements as global and private      // loads.      // -  }      LLVM_FALLTHROUGH;    case AMDGPUAS::FLAT_ADDRESS:      if (NumElements > 4)        return SplitVectorLoad(Op, DAG);      // v4 loads are supported for private and global memory.      return SDValue(); -  case AMDGPUAS::PRIVATE_ADDRESS: { +  case AMDGPUAS::PRIVATE_ADDRESS:      // Depending on the setting of the private_element_size field in the      // resource descriptor, we can only make private accesses up to a certain      // size. @@ -2946,8 +2978,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {      default:        llvm_unreachable("unsupported private_element_size");      } -  } -  case AMDGPUAS::LOCAL_ADDRESS: { +  case AMDGPUAS::LOCAL_ADDRESS:      if (NumElements > 2)        return SplitVectorLoad(Op, DAG); @@ -2956,7 +2987,6 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {      // If properly aligned, if we split we might be able to use ds_read_b64.      return SplitVectorLoad(Op, DAG); -  }    default:      return SDValue();    } @@ -3454,27 +3484,24 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,  static bool canFoldOffset(unsigned OffsetSize, unsigned AS,                            const SISubtarget &STI) {    switch (AS) { -  case AMDGPUAS::GLOBAL_ADDRESS: { +  case AMDGPUAS::GLOBAL_ADDRESS:      // MUBUF instructions a 12-bit offset in bytes.      return isUInt<12>(OffsetSize); -  } -  case AMDGPUAS::CONSTANT_ADDRESS: { +  case AMDGPUAS::CONSTANT_ADDRESS:      // SMRD instructions have an 8-bit offset in dwords on SI and      // a 20-bit offset in bytes on VI.      if (STI.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)        return isUInt<20>(OffsetSize);      else        return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4); -  }    case AMDGPUAS::LOCAL_ADDRESS: -  case AMDGPUAS::REGION_ADDRESS: { +  case AMDGPUAS::REGION_ADDRESS:      // The single offset versions have a 16-bit offset in bytes.      return isUInt<16>(OffsetSize); -  }    case AMDGPUAS::PRIVATE_ADDRESS:    // Indirect register addressing does not use any offsets.    default: -    return 0; +    return false;    }  } @@ -4176,11 +4203,10 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,    case ISD::ATOMIC_LOAD_UMIN:    case ISD::ATOMIC_LOAD_UMAX:    case AMDGPUISD::ATOMIC_INC: -  case AMDGPUISD::ATOMIC_DEC: { // TODO: Target mem intrinsics. +  case AMDGPUISD::ATOMIC_DEC: // TODO: Target mem intrinsics.      if (DCI.isBeforeLegalize())        break;      return performMemSDNodeCombine(cast<MemSDNode>(N), DCI); -  }    case ISD::AND:      return performAndCombine(N, DCI);    case ISD::OR: @@ -4291,7 +4317,6 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,    // Update the users of the node with the new indices    for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) { -      SDNode *User = Users[i];      if (!User)        continue; diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp index fe1464726af..c6b420fce8a 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -1,4 +1,4 @@ -//===-- SIInsertSkips.cpp - Use predicates for control flow ----------===// +//===-- SIInsertSkips.cpp - Use predicates for control flow ---------------===//  //  //                     The LLVM Compiler Infrastructure  // @@ -12,33 +12,46 @@  /// branches when it's expected that jumping over the untaken control flow will  /// be cheaper than having every workitem no-op through it.  // +//===----------------------------------------------------------------------===//  #include "AMDGPU.h"  #include "AMDGPUSubtarget.h"  #include "SIInstrInfo.h"  #include "SIMachineFunctionInfo.h" -#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h"  #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DebugLoc.h"  #include "llvm/MC/MCAsmInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" +#include <cassert> +#include <cstdint> +#include <iterator>  using namespace llvm;  #define DEBUG_TYPE "si-insert-skips" -namespace { -  static cl::opt<unsigned> SkipThresholdFlag(    "amdgpu-skip-threshold",    cl::desc("Number of instructions before jumping over divergent control flow"),    cl::init(12), cl::Hidden); +namespace { +  class SIInsertSkips : public MachineFunctionPass {  private: -  const SIRegisterInfo *TRI; -  const SIInstrInfo *TII; -  unsigned SkipThreshold; +  const SIRegisterInfo *TRI = nullptr; +  const SIInstrInfo *TII = nullptr; +  unsigned SkipThreshold = 0;    bool shouldSkip(const MachineBasicBlock &From,                    const MachineBasicBlock &To) const; @@ -55,8 +68,7 @@ private:  public:    static char ID; -  SIInsertSkips() : -    MachineFunctionPass(ID), TRI(nullptr), TII(nullptr), SkipThreshold(0) { } +  SIInsertSkips() : MachineFunctionPass(ID) {}    bool runOnMachineFunction(MachineFunction &MF) override; @@ -69,7 +81,7 @@ public:    }  }; -} // End anonymous namespace +} // end anonymous namespace  char SIInsertSkips::ID = 0; @@ -270,19 +282,19 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {        MachineInstr &MI = *I;        switch (MI.getOpcode()) { -      case AMDGPU::SI_MASK_BRANCH: { +      case AMDGPU::SI_MASK_BRANCH:          ExecBranchStack.push_back(MI.getOperand(0).getMBB());          MadeChange |= skipMaskBranch(MI, MBB);          break; -      } -      case AMDGPU::S_BRANCH: { + +      case AMDGPU::S_BRANCH:          // Optimize out branches to the next block.          // FIXME: Shouldn't this be handled by BranchFolding?          if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB()))            MI.eraseFromParent();          break; -      } -      case AMDGPU::SI_KILL_TERMINATOR: { + +      case AMDGPU::SI_KILL_TERMINATOR:          MadeChange = true;          kill(MI); @@ -298,8 +310,8 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {          MI.eraseFromParent();          break; -      } -      case AMDGPU::SI_RETURN: { + +      case AMDGPU::SI_RETURN:          // FIXME: Should move somewhere else          assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid()); @@ -318,7 +330,8 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {              .addMBB(EmptyMBBAtEnd);            I->eraseFromParent();          } -      } +        break; +        default:          break;        } diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp index fceabd7a8fd..c814e55e844 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -21,11 +21,28 @@  #include "SIDefines.h"  #include "SIInstrInfo.h"  #include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h"  #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h"  #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h"  #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstring> +#include <new> +#include <utility>  #define DEBUG_TYPE "si-insert-waits" @@ -42,7 +59,6 @@ typedef union {      unsigned LGKM;    } Named;    unsigned Array[3]; -  } Counters;  typedef enum { @@ -55,11 +71,10 @@ typedef Counters RegCounters[512];  typedef std::pair<unsigned, unsigned> RegInterval;  class SIInsertWaits : public MachineFunctionPass { -  private: -  const SISubtarget *ST; -  const SIInstrInfo *TII; -  const SIRegisterInfo *TRI; +  const SISubtarget *ST = nullptr; +  const SIInstrInfo *TII = nullptr; +  const SIRegisterInfo *TRI = nullptr;    const MachineRegisterInfo *MRI;    IsaVersion IV; @@ -86,7 +101,7 @@ private:    RegCounters DefinedRegs;    /// \brief Different export instruction types seen since last wait. -  unsigned ExpInstrTypesSeen; +  unsigned ExpInstrTypesSeen = 0;    /// \brief Type of the last opcode.    InstType LastOpcodeType; @@ -100,7 +115,7 @@ private:    bool ReturnsVoid;    /// Whether the VCCZ bit is possibly corrupt -  bool VCCZCorrupt; +  bool VCCZCorrupt = false;    /// \brief Get increment/decrement amount for this instruction.    Counters getHwCounts(MachineInstr &MI); @@ -141,13 +156,7 @@ private:  public:    static char ID; -  SIInsertWaits() : -    MachineFunctionPass(ID), -    ST(nullptr), -    TII(nullptr), -    TRI(nullptr), -    ExpInstrTypesSeen(0), -    VCCZCorrupt(false) { } +  SIInsertWaits() : MachineFunctionPass(ID) {}    bool runOnMachineFunction(MachineFunction &MF) override; @@ -161,7 +170,7 @@ public:    }  }; -} // End anonymous namespace +} // end anonymous namespace  INITIALIZE_PASS_BEGIN(SIInsertWaits, DEBUG_TYPE,                        "SI Insert Waits", false, false) @@ -294,7 +303,6 @@ RegInterval SIInsertWaits::getRegInterval(const TargetRegisterClass *RC,  void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,                                      MachineBasicBlock::iterator I,                                      const Counters &Increment) { -    // Get the hardware counter increments and sum them up    Counters Limit = ZeroCounts;    unsigned Sum = 0; @@ -366,7 +374,6 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,  bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,                                 MachineBasicBlock::iterator I,                                 const Counters &Required) { -    // End of program? No need to wait on anything    // A function not returning void needs to wait, because other bytecode will    // be appended after it and we don't know what it will be. @@ -393,7 +400,6 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,    bool NeedWait = false;    for (unsigned i = 0; i < 3; ++i) { -      if (Required.Array[i] <= WaitedOn.Array[i])        continue; @@ -434,7 +440,6 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,  /// \brief helper function for handleOperands  static void increaseCounters(Counters &Dst, const Counters &Src) { -    for (unsigned i = 0; i < 3; ++i)      Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);  } @@ -468,7 +473,6 @@ void SIInsertWaits::handleExistingWait(MachineBasicBlock::iterator I) {  }  Counters SIInsertWaits::handleOperands(MachineInstr &MI) { -    Counters Result = ZeroCounts;    // For each register affected by this instruction increase the result @@ -484,7 +488,6 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {      const TargetRegisterClass *RC = TII->getOpRegClass(MI, i);      RegInterval Interval = getRegInterval(RC, Op);      for (unsigned j = Interval.first; j < Interval.second; ++j) { -        if (Op.isDef()) {          increaseCounters(Result, UsedRegs[j]);          increaseCounters(Result, DefinedRegs[j]); diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index ae5aefc2676..6b0d18efa81 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -39,15 +39,27 @@  #include "AMDGPUSubtarget.h"  #include "SIInstrInfo.h"  #include "SIRegisterInfo.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/LiveVariables.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineBasicBlock.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h"  #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h"  #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Pass.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h"  #include "llvm/Support/raw_ostream.h"  #include "llvm/Target/TargetMachine.h" +#include <cassert> +#include <iterator> +#include <utility>  using namespace llvm; @@ -57,10 +69,10 @@ namespace {  class SILoadStoreOptimizer : public MachineFunctionPass {  private: -  const SIInstrInfo *TII; -  const SIRegisterInfo *TRI; -  MachineRegisterInfo *MRI; -  AliasAnalysis *AA; +  const SIInstrInfo *TII = nullptr; +  const SIRegisterInfo *TRI = nullptr; +  MachineRegisterInfo *MRI = nullptr; +  AliasAnalysis *AA = nullptr;    static bool offsetsCanBeCombined(unsigned Offset0,                                     unsigned Offset1, @@ -86,9 +98,7 @@ private:  public:    static char ID; -  SILoadStoreOptimizer() -      : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), MRI(nullptr), -        AA(nullptr) {} +  SILoadStoreOptimizer() : MachineFunctionPass(ID) {}    SILoadStoreOptimizer(const TargetMachine &TM_) : MachineFunctionPass(ID) {      initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry()); @@ -108,7 +118,7 @@ public:    }  }; -} // End anonymous namespace. +} // end anonymous namespace.  INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE,                        "SI Load / Store Optimizer", false, false) @@ -141,11 +151,10 @@ static void addDefsToList(const MachineInstr &MI,    }  } -static bool memAccessesCanBeReordered( -  MachineBasicBlock::iterator A, -  MachineBasicBlock::iterator B, -  const SIInstrInfo *TII, -  llvm::AliasAnalysis * AA) { +static bool memAccessesCanBeReordered(MachineBasicBlock::iterator A, +                                      MachineBasicBlock::iterator B, +                                      const SIInstrInfo *TII, +                                      AliasAnalysis * AA) {    return (TII->areMemAccessesTriviallyDisjoint(*A, *B, AA) ||      // RAW or WAR - cannot reorder      // WAW - cannot reorder @@ -179,7 +188,6 @@ canMoveInstsAcrossMemOp(MachineInstr &MemOp,                          ArrayRef<MachineInstr*> InstsToMove,                          const SIInstrInfo *TII,                          AliasAnalysis *AA) { -    assert(MemOp.mayLoadOrStore());    for (MachineInstr *InstToMove : InstsToMove) { @@ -230,7 +238,6 @@ SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I,    addDefsToList(*I, DefsToMove);    for ( ; MBBI != E; ++MBBI) { -      if (MBBI->getOpcode() != I->getOpcode()) {        // This is not a matching DS instruction, but we can keep looking as diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 3b4e233cd78..2d92068e5c4 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -16,13 +16,16 @@  #include "AMDGPUMachineFunction.h"  #include "SIRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h"  #include <array> +#include <cassert>  #include <map> +#include <utility>  namespace llvm { -class MachineRegisterInfo; -  class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {  public:    explicit AMDGPUImagePseudoSourceValue() : @@ -174,10 +177,12 @@ private:  public:    struct SpilledReg { -    unsigned VGPR; -    int Lane; +    unsigned VGPR = AMDGPU::NoRegister; +    int Lane = -1; + +    SpilledReg() = default;      SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { } -    SpilledReg() : VGPR(AMDGPU::NoRegister), Lane(-1) { } +      bool hasLane() { return Lane != -1;}      bool hasReg() { return VGPR != AMDGPU::NoRegister;}    }; @@ -185,6 +190,7 @@ public:    // SIMachineFunctionInfo definition    SIMachineFunctionInfo(const MachineFunction &MF); +    SpilledReg getSpilledReg(MachineFunction *MF, unsigned FrameIndex,                             unsigned SubIdx);    bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; }; @@ -495,6 +501,6 @@ public:    }  }; -} // End namespace llvm +} // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.h b/llvm/lib/Target/AMDGPU/SIMachineScheduler.h index 77c07350d32..2dc4b346de7 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.h +++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.h @@ -40,13 +40,12 @@ enum SIScheduleCandReason {  struct SISchedulerCandidate {    // The reason for this candidate. -  SIScheduleCandReason Reason; +  SIScheduleCandReason Reason = NoCand;    // Set of reasons that apply to multiple candidates. -  uint32_t RepeatReasonSet; +  uint32_t RepeatReasonSet = 0; -  SISchedulerCandidate() -    :  Reason(NoCand), RepeatReasonSet(0) {} +  SISchedulerCandidate() = default;    bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }    void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); } @@ -84,8 +83,8 @@ class SIScheduleBlock {    std::set<unsigned> LiveInRegs;    std::set<unsigned> LiveOutRegs; -  bool Scheduled; -  bool HighLatencyBlock; +  bool Scheduled = false; +  bool HighLatencyBlock = false;    std::vector<unsigned> HasLowLatencyNonWaitedParent; @@ -94,13 +93,12 @@ class SIScheduleBlock {    std::vector<SIScheduleBlock*> Preds;  // All blocks predecessors.    std::vector<SIScheduleBlock*> Succs;  // All blocks successors. -  unsigned NumHighLatencySuccessors; +  unsigned NumHighLatencySuccessors = 0;  public:    SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,                    unsigned ID): -    DAG(DAG), BC(BC), TopRPTracker(TopPressure), Scheduled(false), -    HighLatencyBlock(false), ID(ID), NumHighLatencySuccessors(0) {} +    DAG(DAG), BC(BC), TopRPTracker(TopPressure), ID(ID) {}    ~SIScheduleBlock() = default; @@ -213,9 +211,9 @@ struct SIScheduleBlocks {  };  enum SISchedulerBlockCreatorVariant { -    LatenciesAlone, -    LatenciesGrouped, -    LatenciesAlonePlusConsecutive +  LatenciesAlone, +  LatenciesGrouped, +  LatenciesAlonePlusConsecutive  };  class SIScheduleBlockCreator {  | 

