AMDGPU: Separate R600 and GCN TableGen files

Summary: We now have two sets of generated TableGen files, one for R600 and one for GCN, so each sub-target now has its own tables of instructions, registers, ISel patterns, etc. This should help reduce compile time since each sub-target now only has to consider information that is specific to itself. This will also help prevent the R600 sub-target from slowing down new features for GCN, like disassembler support, GlobalISel, etc. Reviewers: arsenm, nhaehnle, jvesely Reviewed By: arsenm Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D46365 llvm-svn: 335942
author: Tom Stellard <tstellar@redhat.com> 2018-06-28 23:47:12 +0000
committer: Tom Stellard <tstellar@redhat.com> 2018-06-28 23:47:12 +0000
commit: c5a154db48c3cd9e16b5c74977d506415414daf7 (patch)
tree: 9f13c6c0c08d47bb47b4058de080226cfec8f739 /llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
parent: 3702f9128779ffdd0f3b2c9db88c21379aaf5171 (diff)
download: bcm5719-llvm-c5a154db48c3cd9e16b5c74977d506415414daf7.tar.gz
bcm5719-llvm-c5a154db48c3cd9e16b5c74977d506415414daf7.zip
1 files changed, 375 insertions, 238 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 333b99f9ced..9c8b82c2834 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -39,22 +39,181 @@
 
 #define GET_SUBTARGETINFO_HEADER
 #include "AMDGPUGenSubtargetInfo.inc"
+#define GET_SUBTARGETINFO_HEADER
+#include "R600GenSubtargetInfo.inc"
 
 namespace llvm {
 
 class StringRef;
 
-class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
+class AMDGPUCommonSubtarget {
+private:
+  Triple TargetTriple;
+
+protected:
+  const FeatureBitset &SubtargetFeatureBits;
+  bool Has16BitInsts;
+  bool HasMadMixInsts;
+  bool FP32Denormals;
+  bool FPExceptions;
+  bool HasSDWA;
+  bool HasVOP3PInsts;
+  bool HasMulI24;
+  bool HasMulU24;
+  bool HasFminFmaxLegacy;
+  bool EnablePromoteAlloca;
+  int LocalMemorySize;
+  unsigned WavefrontSize;
+
+public:
+  AMDGPUCommonSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
+
+  static const AMDGPUCommonSubtarget &get(const MachineFunction &MF);
+  static const AMDGPUCommonSubtarget &get(const TargetMachine &TM,
+                                          const Function &F);
+
+  /// \returns Default range flat work group size for a calling convention.
+  std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
+
+  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
+  /// for function \p F, or minimum/maximum flat work group sizes explicitly
+  /// requested using "amdgpu-flat-work-group-size" attribute attached to
+  /// function \p F.
+  ///
+  /// \returns Subtarget's default values if explicitly requested values cannot
+  /// be converted to integer, or violate subtarget's specifications.
+  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
+
+  /// \returns Subtarget's default pair of minimum/maximum number of waves per
+  /// execution unit for function \p F, or minimum/maximum number of waves per
+  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
+  /// attached to function \p F.
+  ///
+  /// \returns Subtarget's default values if explicitly requested values cannot
+  /// be converted to integer, violate subtarget's specifications, or are not
+  /// compatible with minimum/maximum number of waves limited by flat work group
+  /// size, register usage, and/or lds usage.
+  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
+
+  /// Return the amount of LDS that can be used that will not restrict the
+  /// occupancy lower than WaveCount.
+  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
+                                           const Function &) const;
+
+  /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
+  /// the given LDS memory size is the only constraint.
+  unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
+
+  unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
+
+  bool isAmdHsaOS() const {
+    return TargetTriple.getOS() == Triple::AMDHSA;
+  }
+
+  bool isAmdPalOS() const {
+    return TargetTriple.getOS() == Triple::AMDPAL;
+  }
+
+  bool has16BitInsts() const {
+    return Has16BitInsts;
+  }
+
+  bool hasMadMixInsts() const {
+    return HasMadMixInsts;
+  }
+
+  bool hasFP32Denormals() const {
+    return FP32Denormals;
+  }
+
+  bool hasFPExceptions() const {
+    return FPExceptions;
+  }
+
+  bool hasSDWA() const {
+    return HasSDWA;
+  }
+
+  bool hasVOP3PInsts() const {
+    return HasVOP3PInsts;
+  }
+
+  bool hasMulI24() const {
+    return HasMulI24;
+  }
+
+  bool hasMulU24() const {
+    return HasMulU24;
+  }
+
+  bool hasFminFmaxLegacy() const {
+    return HasFminFmaxLegacy;
+  }
+
+  bool isPromoteAllocaEnabled() const {
+    return EnablePromoteAlloca;
+  }
+
+  unsigned getWavefrontSize() const {
+    return WavefrontSize;
+  }
+
+  int getLocalMemorySize() const {
+    return LocalMemorySize;
+  }
+
+  unsigned getAlignmentForImplicitArgPtr() const {
+    return isAmdHsaOS() ? 8 : 4;
+  }
+
+  /// \returns Maximum number of work groups per compute unit supported by the
+  /// subtarget and limited by given \p FlatWorkGroupSize.
+  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
+    return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits,
+                                                  FlatWorkGroupSize);
+  }
+
+  /// \returns Minimum flat work group size supported by the subtarget.
+  unsigned getMinFlatWorkGroupSize() const {
+    return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits);
+  }
+
+  /// \returns Maximum flat work group size supported by the subtarget.
+  unsigned getMaxFlatWorkGroupSize() const {
+    return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits);
+  }
+
+  /// \returns Maximum number of waves per execution unit supported by the
+  /// subtarget and limited by given \p FlatWorkGroupSize.
+  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
+    return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits,
+                                             FlatWorkGroupSize);
+  }
+
+  /// \returns Minimum number of waves per execution unit supported by the
+  /// subtarget.
+  unsigned getMinWavesPerEU() const {
+    return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits);
+  }
+
+  unsigned getMaxWavesPerEU() const { return 10; }
+
+  /// Creates value range metadata on an workitemid.* inrinsic call or load.
+  bool makeLIDRangeMetadata(Instruction *I) const;
+
+  virtual ~AMDGPUCommonSubtarget() {}
+};
+
+class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo,
+                        public AMDGPUCommonSubtarget {
 public:
   enum Generation {
-    R600 = 0,
-    R700,
-    EVERGREEN,
-    NORTHERN_ISLANDS,
-    SOUTHERN_ISLANDS,
-    SEA_ISLANDS,
-    VOLCANIC_ISLANDS,
-    GFX9,
+    // Gap for R600 generations, so we can do comparisons between
+    // AMDGPUSubtarget and r600Subtarget.
+    SOUTHERN_ISLANDS = 4,
+    SEA_ISLANDS = 5,
+    VOLCANIC_ISLANDS = 6,
+    GFX9 = 7,
   };
 
   enum {
@@ -96,13 +255,20 @@ public:
     LLVMTrapHandlerRegValue = 1
   };
 
+private:
+  SIFrameLowering FrameLowering;
+
+  /// GlobalISel related APIs.
+  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
+  std::unique_ptr<InstructionSelector> InstSelector;
+  std::unique_ptr<LegalizerInfo> Legalizer;
+  std::unique_ptr<RegisterBankInfo> RegBankInfo;
+
 protected:
   // Basic subtarget description.
   Triple TargetTriple;
-  Generation Gen;
+  unsigned Gen;
   unsigned IsaVersion;
-  unsigned WavefrontSize;
-  int LocalMemorySize;
   int LDSBankCount;
   unsigned MaxPrivateElementSize;
 
@@ -111,9 +277,7 @@ protected:
   bool HalfRate64Ops;
 
   // Dynamially set bits that enable features.
-  bool FP32Denormals;
   bool FP64FP16Denormals;
-  bool FPExceptions;
   bool DX10Clamp;
   bool FlatForGlobal;
   bool AutoWaitcntBeforeBarrier;
@@ -129,7 +293,6 @@ protected:
   // Used as options.
   bool EnableHugePrivateBuffer;
   bool EnableVGPRSpilling;
-  bool EnablePromoteAlloca;
   bool EnableLoadStoreOpt;
   bool EnableUnsafeDSOffsetFolding;
   bool EnableSIScheduler;
@@ -146,17 +309,13 @@ protected:
   bool GFX9Insts;
   bool SGPRInitBug;
   bool HasSMemRealTime;
-  bool Has16BitInsts;
   bool HasIntClamp;
-  bool HasVOP3PInsts;
-  bool HasMadMixInsts;
   bool HasFmaMixInsts;
   bool HasMovrel;
   bool HasVGPRIndexMode;
   bool HasScalarStores;
   bool HasScalarAtomics;
   bool HasInv2PiInlineImm;
-  bool HasSDWA;
   bool HasSDWAOmod;
   bool HasSDWAScalar;
   bool HasSDWASdst;
@@ -181,7 +340,6 @@ protected:
   // Dummy feature to use for assembler in tablegen.
   bool FeatureDisable;
 
-  InstrItineraryData InstrItins;
   SelectionDAGTargetInfo TSInfo;
   AMDGPUAS AS;
 
@@ -193,13 +351,30 @@ public:
   AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT,
                                                    StringRef GPU, StringRef FS);
 
-  const AMDGPUInstrInfo *getInstrInfo() const override = 0;
-  const AMDGPUFrameLowering *getFrameLowering() const override = 0;
-  const AMDGPUTargetLowering *getTargetLowering() const override = 0;
-  const AMDGPURegisterInfo *getRegisterInfo() const override = 0;
+  virtual const SIInstrInfo *getInstrInfo() const override = 0;
 
-  const InstrItineraryData *getInstrItineraryData() const override {
-    return &InstrItins;
+  const SIFrameLowering *getFrameLowering() const override {
+    return &FrameLowering;
+  }
+
+  virtual const SITargetLowering *getTargetLowering() const override = 0;
+
+  virtual const SIRegisterInfo *getRegisterInfo() const override = 0;
+
+  const CallLowering *getCallLowering() const override {
+    return CallLoweringInfo.get();
+  }
+
+  const InstructionSelector *getInstructionSelector() const override {
+    return InstSelector.get();
+  }
+
+  const LegalizerInfo *getLegalizerInfo() const override {
+    return Legalizer.get();
+  }
+
+  const RegisterBankInfo *getRegBankInfo() const override {
+    return RegBankInfo.get();
   }
 
   // Nothing implemented, just prevent crashes on use.
@@ -209,34 +384,18 @@ public:
 
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
-  bool isAmdHsaOS() const {
-    return TargetTriple.getOS() == Triple::AMDHSA;
-  }
-
   bool isMesa3DOS() const {
     return TargetTriple.getOS() == Triple::Mesa3D;
   }
 
-  bool isAmdPalOS() const {
-    return TargetTriple.getOS() == Triple::AMDPAL;
-  }
-
   Generation getGeneration() const {
-    return Gen;
-  }
-
-  unsigned getWavefrontSize() const {
-    return WavefrontSize;
+    return (Generation)Gen;
   }
 
   unsigned getWavefrontSizeLog2() const {
     return Log2_32(WavefrontSize);
   }
 
-  int getLocalMemorySize() const {
-    return LocalMemorySize;
-  }
-
   int getLDSBankCount() const {
     return LDSBankCount;
   }
@@ -249,18 +408,10 @@ public:
     return AS;
   }
 
-  bool has16BitInsts() const {
-    return Has16BitInsts;
-  }
-
   bool hasIntClamp() const {
     return HasIntClamp;
   }
 
-  bool hasVOP3PInsts() const {
-    return HasVOP3PInsts;
-  }
-
   bool hasFP64() const {
     return FP64;
   }
@@ -269,6 +420,10 @@ public:
     return MIMG_R128;
   }
 
+  bool hasHWFP64() const {
+    return FP64;
+  }
+
   bool hasFastFMAF32() const {
     return FastFMAF32;
   }
@@ -278,15 +433,15 @@ public:
   }
 
   bool hasAddr64() const {
-    return (getGeneration() < VOLCANIC_ISLANDS);
+    return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
   }
 
   bool hasBFE() const {
-    return (getGeneration() >= EVERGREEN);
+    return true;
   }
 
   bool hasBFI() const {
-    return (getGeneration() >= EVERGREEN);
+    return true;
   }
 
   bool hasBFM() const {
@@ -294,42 +449,23 @@ public:
   }
 
   bool hasBCNT(unsigned Size) const {
-    if (Size == 32)
-      return (getGeneration() >= EVERGREEN);
-
-    if (Size == 64)
-      return (getGeneration() >= SOUTHERN_ISLANDS);
-
-    return false;
-  }
-
-  bool hasMulU24() const {
-    return (getGeneration() >= EVERGREEN);
-  }
-
-  bool hasMulI24() const {
-    return (getGeneration() >= SOUTHERN_ISLANDS ||
-            hasCaymanISA());
+    return true;
   }
 
   bool hasFFBL() const {
-    return (getGeneration() >= EVERGREEN);
+    return true;
   }
 
   bool hasFFBH() const {
-    return (getGeneration() >= EVERGREEN);
+    return true;
   }
 
   bool hasMed3_16() const {
-    return getGeneration() >= GFX9;
+    return getGeneration() >= AMDGPUSubtarget::GFX9;
   }
 
   bool hasMin3Max3_16() const {
-    return getGeneration() >= GFX9;
-  }
-
-  bool hasMadMixInsts() const {
-    return HasMadMixInsts;
+    return getGeneration() >= AMDGPUSubtarget::GFX9;
   }
 
   bool hasFmaMixInsts() const {
@@ -337,15 +473,7 @@ public:
   }
 
   bool hasCARRY() const {
-    return (getGeneration() >= EVERGREEN);
-  }
-
-  bool hasBORROW() const {
-    return (getGeneration() >= EVERGREEN);
-  }
-
-  bool hasCaymanISA() const {
-    return CaymanISA;
+    return true;
   }
 
   bool hasFMA() const {
@@ -360,10 +488,6 @@ public:
     return EnableHugePrivateBuffer;
   }
 
-  bool isPromoteAllocaEnabled() const {
-    return EnablePromoteAlloca;
-  }
-
   bool unsafeDSOffsetFoldingEnabled() const {
     return EnableUnsafeDSOffsetFolding;
   }
@@ -377,20 +501,10 @@ public:
   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
                                            const Function &) const;
 
-  /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
-  /// the given LDS memory size is the only constraint.
-  unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
-
-  unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
-
   bool hasFP16Denormals() const {
     return FP64FP16Denormals;
   }
 
-  bool hasFP32Denormals() const {
-    return FP32Denormals;
-  }
-
   bool hasFP64Denormals() const {
     return FP64FP16Denormals;
   }
@@ -399,10 +513,6 @@ public:
     return getGeneration() >= AMDGPUSubtarget::GFX9;
   }
 
-  bool hasFPExceptions() const {
-    return FPExceptions;
-  }
-
   bool enableDX10Clamp() const {
     return DX10Clamp;
   }
@@ -444,7 +554,7 @@ public:
   }
 
   bool hasApertureRegs() const {
-   return HasApertureRegs;
+    return HasApertureRegs;
   }
 
   bool isTrapHandlerEnabled() const {
@@ -510,14 +620,6 @@ public:
     return getGeneration() >= SEA_ISLANDS;
   }
 
-  bool hasFminFmaxLegacy() const {
-    return getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
-  }
-
-  bool hasSDWA() const {
-    return HasSDWA;
-  }
-
   bool hasSDWAOmod() const {
     return HasSDWAOmod;
   }
@@ -556,10 +658,6 @@ public:
     return isAmdCodeObjectV2(F) ? 0 : 36;
   }
 
-  unsigned getAlignmentForImplicitArgPtr() const {
-    return isAmdHsaOS() ? 8 : 4;
-  }
-
   /// \returns Number of bytes of arguments that are passed to a shader or
   /// kernel in addition to the explicit ones declared for the function.
   unsigned getImplicitArgNumBytes(const Function &F) const {
@@ -588,134 +686,39 @@ public:
     return true;
   }
 
-  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;}
-  bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;}
+  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
+  bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }
 
   /// \returns Number of execution units per compute unit supported by the
   /// subtarget.
   unsigned getEUsPerCU() const {
-    return AMDGPU::IsaInfo::getEUsPerCU(getFeatureBits());
-  }
-
-  /// \returns Maximum number of work groups per compute unit supported by the
-  /// subtarget and limited by given \p FlatWorkGroupSize.
-  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
-    return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(getFeatureBits(),
-                                                  FlatWorkGroupSize);
+    return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits());
   }
 
   /// \returns Maximum number of waves per compute unit supported by the
   /// subtarget without any kind of limitation.
   unsigned getMaxWavesPerCU() const {
-    return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits());
+    return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits());
   }
 
   /// \returns Maximum number of waves per compute unit supported by the
   /// subtarget and limited by given \p FlatWorkGroupSize.
   unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
-    return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits(),
+    return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(),
                                              FlatWorkGroupSize);
   }
 
-  /// \returns Minimum number of waves per execution unit supported by the
-  /// subtarget.
-  unsigned getMinWavesPerEU() const {
-    return AMDGPU::IsaInfo::getMinWavesPerEU(getFeatureBits());
-  }
-
   /// \returns Maximum number of waves per execution unit supported by the
   /// subtarget without any kind of limitation.
   unsigned getMaxWavesPerEU() const {
-    return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits());
-  }
-
-  /// \returns Maximum number of waves per execution unit supported by the
-  /// subtarget and limited by given \p FlatWorkGroupSize.
-  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
-    return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits(),
-                                             FlatWorkGroupSize);
-  }
-
-  /// \returns Minimum flat work group size supported by the subtarget.
-  unsigned getMinFlatWorkGroupSize() const {
-    return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(getFeatureBits());
-  }
-
-  /// \returns Maximum flat work group size supported by the subtarget.
-  unsigned getMaxFlatWorkGroupSize() const {
-    return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(getFeatureBits());
+    return AMDGPU::IsaInfo::getMaxWavesPerEU();
   }
 
   /// \returns Number of waves per work group supported by the subtarget and
   /// limited by given \p FlatWorkGroupSize.
   unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
-    return AMDGPU::IsaInfo::getWavesPerWorkGroup(getFeatureBits(),
-                                                 FlatWorkGroupSize);
-  }
-
-  /// \returns Default range flat work group size for a calling convention.
-  std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
-
-  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
-  /// for function \p F, or minimum/maximum flat work group sizes explicitly
-  /// requested using "amdgpu-flat-work-group-size" attribute attached to
-  /// function \p F.
-  ///
-  /// \returns Subtarget's default values if explicitly requested values cannot
-  /// be converted to integer, or violate subtarget's specifications.
-  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
-
-  /// \returns Subtarget's default pair of minimum/maximum number of waves per
-  /// execution unit for function \p F, or minimum/maximum number of waves per
-  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
-  /// attached to function \p F.
-  ///
-  /// \returns Subtarget's default values if explicitly requested values cannot
-  /// be converted to integer, violate subtarget's specifications, or are not
-  /// compatible with minimum/maximum number of waves limited by flat work group
-  /// size, register usage, and/or lds usage.
-  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
-
-  /// Creates value range metadata on an workitemid.* inrinsic call or load.
-  bool makeLIDRangeMetadata(Instruction *I) const;
-};
-
-class R600Subtarget final : public AMDGPUSubtarget {
-private:
-  R600InstrInfo InstrInfo;
-  R600FrameLowering FrameLowering;
-  R600TargetLowering TLInfo;
-
-public:
-  R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
-                const TargetMachine &TM);
-
-  const R600InstrInfo *getInstrInfo() const override {
-    return &InstrInfo;
-  }
-
-  const R600FrameLowering *getFrameLowering() const override {
-    return &FrameLowering;
-  }
-
-  const R600TargetLowering *getTargetLowering() const override {
-    return &TLInfo;
-  }
-
-  const R600RegisterInfo *getRegisterInfo() const override {
-    return &InstrInfo.getRegisterInfo();
-  }
-
-  bool hasCFAluBug() const {
-    return CFALUBug;
-  }
-
-  bool hasVertexCache() const {
-    return HasVertexCache;
-  }
-
-  short getTexVTXClauseSize() const {
-    return TexVTXClauseSize;
+    return AMDGPU::IsaInfo::getWavesPerWorkGroup(
+        MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
   }
 };
 
@@ -766,6 +769,8 @@ public:
   const SIRegisterInfo *getRegisterInfo() const override {
     return &InstrInfo.getRegisterInfo();
   }
+  // static wrappers
+  static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
 
   // XXX - Why is this here if it isn't in the default pass set?
   bool enableEarlyIfConversion() const override {
@@ -775,7 +780,7 @@ public:
   void overrideSchedPolicy(MachineSchedPolicy &Policy,
                            unsigned NumRegionInstrs) const override;
 
-  bool isVGPRSpillingEnabled(const Function& F) const;
+  bool isVGPRSpillingEnabled(const Function &F) const;
 
   unsigned getMaxNumUserSGPRs() const {
     return 16;
@@ -860,16 +865,18 @@ public:
   unsigned getKernArgSegmentSize(const Function &F,
                                  unsigned ExplictArgBytes) const;
 
-  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
+  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
+  /// SGPRs
   unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
 
-  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs
+  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
+  /// VGPRs
   unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
 
   /// \returns true if the flat_scratch register should be initialized with the
   /// pointer to the wave's scratch memory rather than a size and offset.
   bool flatScratchIsPointer() const {
-    return getGeneration() >= GFX9;
+    return getGeneration() >= AMDGPUSubtarget::GFX9;
   }
 
   /// \returns true if the machine has merged shaders in which s0-s7 are
@@ -880,35 +887,39 @@ public:
 
   /// \returns SGPR allocation granularity supported by the subtarget.
   unsigned getSGPRAllocGranule() const {
-    return AMDGPU::IsaInfo::getSGPRAllocGranule(getFeatureBits());
+    return AMDGPU::IsaInfo::getSGPRAllocGranule(
+        MCSubtargetInfo::getFeatureBits());
   }
 
   /// \returns SGPR encoding granularity supported by the subtarget.
   unsigned getSGPREncodingGranule() const {
-    return AMDGPU::IsaInfo::getSGPREncodingGranule(getFeatureBits());
+    return AMDGPU::IsaInfo::getSGPREncodingGranule(
+        MCSubtargetInfo::getFeatureBits());
   }
 
   /// \returns Total number of SGPRs supported by the subtarget.
   unsigned getTotalNumSGPRs() const {
-    return AMDGPU::IsaInfo::getTotalNumSGPRs(getFeatureBits());
+    return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits());
   }
 
   /// \returns Addressable number of SGPRs supported by the subtarget.
   unsigned getAddressableNumSGPRs() const {
-    return AMDGPU::IsaInfo::getAddressableNumSGPRs(getFeatureBits());
+    return AMDGPU::IsaInfo::getAddressableNumSGPRs(
+        MCSubtargetInfo::getFeatureBits());
   }
 
   /// \returns Minimum number of SGPRs that meets the given number of waves per
   /// execution unit requirement supported by the subtarget.
   unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
-    return AMDGPU::IsaInfo::getMinNumSGPRs(getFeatureBits(), WavesPerEU);
+    return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(),
+                                           WavesPerEU);
   }
 
   /// \returns Maximum number of SGPRs that meets the given number of waves per
   /// execution unit requirement supported by the subtarget.
   unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
-    return AMDGPU::IsaInfo::getMaxNumSGPRs(getFeatureBits(), WavesPerEU,
-                                           Addressable);
+    return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(),
+                                           WavesPerEU, Addressable);
   }
 
   /// \returns Reserved number of SGPRs for given function \p MF.
@@ -926,34 +937,39 @@ public:
 
   /// \returns VGPR allocation granularity supported by the subtarget.
   unsigned getVGPRAllocGranule() const {
-    return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());
+    return AMDGPU::IsaInfo::getVGPRAllocGranule(
+        MCSubtargetInfo::getFeatureBits());
   }
 
   /// \returns VGPR encoding granularity supported by the subtarget.
   unsigned getVGPREncodingGranule() const {
-    return AMDGPU::IsaInfo::getVGPREncodingGranule(getFeatureBits());
+    return AMDGPU::IsaInfo::getVGPREncodingGranule(
+        MCSubtargetInfo::getFeatureBits());
   }
 
   /// \returns Total number of VGPRs supported by the subtarget.
   unsigned getTotalNumVGPRs() const {
-    return AMDGPU::IsaInfo::getTotalNumVGPRs(getFeatureBits());
+    return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits());
   }
 
   /// \returns Addressable number of VGPRs supported by the subtarget.
   unsigned getAddressableNumVGPRs() const {
-    return AMDGPU::IsaInfo::getAddressableNumVGPRs(getFeatureBits());
+    return AMDGPU::IsaInfo::getAddressableNumVGPRs(
+        MCSubtargetInfo::getFeatureBits());
   }
 
   /// \returns Minimum number of VGPRs that meets given number of waves per
   /// execution unit requirement supported by the subtarget.
   unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
-    return AMDGPU::IsaInfo::getMinNumVGPRs(getFeatureBits(), WavesPerEU);
+    return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(),
+                                           WavesPerEU);
   }
 
   /// \returns Maximum number of VGPRs that meets given number of waves per
   /// execution unit requirement supported by the subtarget.
   unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
-    return AMDGPU::IsaInfo::getMaxNumVGPRs(getFeatureBits(), WavesPerEU);
+    return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(),
+                                           WavesPerEU);
   }
 
   /// \returns Maximum number of VGPRs that meets number of waves per execution
@@ -971,6 +987,127 @@ public:
       const override;
 };
 
+
+class R600Subtarget final : public R600GenSubtargetInfo,
+                            public AMDGPUCommonSubtarget {
+public:
+  enum Generation { R600 = 0, R700 = 1, EVERGREEN = 2, NORTHERN_ISLANDS = 3 };
+
+private:
+  R600InstrInfo InstrInfo;
+  R600FrameLowering FrameLowering;
+  bool FMA;
+  bool CaymanISA;
+  bool CFALUBug;
+  bool DX10Clamp;
+  bool HasVertexCache;
+  bool R600ALUInst;
+  bool FP64;
+  short TexVTXClauseSize;
+  Generation Gen;
+  R600TargetLowering TLInfo;
+  InstrItineraryData InstrItins;
+  SelectionDAGTargetInfo TSInfo;
+  AMDGPUAS AS;
+
+public:
+  R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
+                const TargetMachine &TM);
+
+  const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; }
+
+  const R600FrameLowering *getFrameLowering() const override {
+    return &FrameLowering;
+  }
+
+  const R600TargetLowering *getTargetLowering() const override {
+    return &TLInfo;
+  }
+
+  const R600RegisterInfo *getRegisterInfo() const override {
+    return &InstrInfo.getRegisterInfo();
+  }
+
+  const InstrItineraryData *getInstrItineraryData() const override {
+    return &InstrItins;
+  }
+
+  // Nothing implemented, just prevent crashes on use.
+  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
+    return &TSInfo;
+  }
+
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+  Generation getGeneration() const {
+    return Gen;
+  }
+
+  unsigned getStackAlignment() const {
+    return 4;
+  }
+
+  R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
+                                                 StringRef GPU, StringRef FS);
+
+  bool hasBFE() const {
+    return (getGeneration() >= EVERGREEN);
+  }
+
+  bool hasBFI() const {
+    return (getGeneration() >= EVERGREEN);
+  }
+
+  bool hasBCNT(unsigned Size) const {
+    if (Size == 32)
+      return (getGeneration() >= EVERGREEN);
+
+    return false;
+  }
+
+  bool hasBORROW() const {
+    return (getGeneration() >= EVERGREEN);
+  }
+
+  bool hasCARRY() const {
+    return (getGeneration() >= EVERGREEN);
+  }
+
+  bool hasCaymanISA() const {
+    return CaymanISA;
+  }
+
+  bool hasFFBL() const {
+    return (getGeneration() >= EVERGREEN);
+  }
+
+  bool hasFFBH() const {
+    return (getGeneration() >= EVERGREEN);
+  }
+
+  bool hasFMA() const { return FMA; }
+
+  unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
+    return 36;
+  }
+
+  bool hasCFAluBug() const { return CFALUBug; }
+
+  bool hasVertexCache() const { return HasVertexCache; }
+
+  short getTexVTXClauseSize() const { return TexVTXClauseSize; }
+
+  AMDGPUAS getAMDGPUAS() const { return AS; }
+
+  bool enableMachineScheduler() const override {
+    return true;
+  }
+
+  bool enableSubRegLiveness() const override {
+    return true;
+  }
+};
+
 } // end namespace llvm
 
 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
author	Tom Stellard <tstellar@redhat.com>	2018-06-28 23:47:12 +0000
committer	Tom Stellard <tstellar@redhat.com>	2018-06-28 23:47:12 +0000
commit	c5a154db48c3cd9e16b5c74977d506415414daf7 (patch)
tree	9f13c6c0c08d47bb47b4058de080226cfec8f739 /llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
parent	3702f9128779ffdd0f3b2c9db88c21379aaf5171 (diff)
download	bcm5719-llvm-c5a154db48c3cd9e16b5c74977d506415414daf7.tar.gz bcm5719-llvm-c5a154db48c3cd9e16b5c74977d506415414daf7.zip