diff options
Diffstat (limited to 'llvm/lib/Target')
63 files changed, 1854 insertions, 1508 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 2c921f0088f..84ae355beb5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -9,23 +9,12 @@  include "llvm/TableGen/SearchableTable.td"  include "llvm/Target/Target.td" +include "AMDGPUFeatures.td"  //===------------------------------------------------------------===//  // Subtarget Features (device properties)  //===------------------------------------------------------------===// -def FeatureFP64 : SubtargetFeature<"fp64", -  "FP64", -  "true", -  "Enable double precision operations" ->; - -def FeatureFMA : SubtargetFeature<"fmaf", -  "FMA", -  "true", -  "Enable single precision FMA (not as fast as mul+add, but fused)" ->; -  def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",    "FastFMAF32",    "true", @@ -44,30 +33,6 @@ def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops",    "Most fp64 instructions are half rate instead of quarter"  >; -def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst", -  "R600ALUInst", -  "false", -  "Older version of ALU instructions encoding" ->; - -def FeatureVertexCache : SubtargetFeature<"HasVertexCache", -  "HasVertexCache", -  "true", -  "Specify use of dedicated vertex cache" ->; - -def FeatureCaymanISA : SubtargetFeature<"caymanISA", -  "CaymanISA", -  "true", -  "Use Cayman ISA" ->; - -def FeatureCFALUBug : SubtargetFeature<"cfalubug", -  "CFALUBug", -  "true", -  "GPU has CF_ALU bug" ->; -  def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",    "FlatAddressSpace",    "true", @@ -153,27 +118,6 @@ def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",    "VI SGPR initialization bug requiring a fixed SGPR allocation size"  >; -class SubtargetFeatureFetchLimit <string Value> : -                          SubtargetFeature <"fetch"#Value, -  "TexVTXClauseSize", -  Value, -  "Limit the maximum number of fetches in a clause to "#Value ->; - -def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">; -def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">; - -class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature< -  "wavefrontsize"#Value, -  "WavefrontSize", -  !cast<string>(Value), -  "The number of threads per wavefront" ->; - -def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>; -def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>; -def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>; -  class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <    "ldsbankcount"#Value,    "LDSBankCount", @@ -184,19 +128,6 @@ class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <  def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;  def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>; -class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature< -  "localmemorysize"#Value, -  "LocalMemorySize", -  !cast<string>(Value), -  "The size of local memory in bytes" ->; - -def FeatureGCN : SubtargetFeature<"gcn", -  "IsGCN", -  "true", -  "GCN or newer GPU" ->; -  def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",    "GCN3Encoding",    "true", @@ -369,12 +300,6 @@ def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",    [FeatureFP64FP16Denormals]  >; -def FeatureDX10Clamp : SubtargetFeature<"dx10-clamp", -  "DX10Clamp", -  "true", -  "clamp modifier clamps NaNs to 0.0" ->; -  def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",    "FPExceptions",    "true", @@ -417,12 +342,6 @@ def FeatureDumpCodeLower : SubtargetFeature <"dumpcode",    "Dump MachineInstrs in the CodeEmitter"  >; -def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca", -  "EnablePromoteAlloca", -  "true", -  "Enable promote alloca pass" ->; -  // XXX - This should probably be removed once enabled by default  def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",    "EnableLoadStoreOpt", @@ -486,45 +405,29 @@ def FeatureDisable : SubtargetFeature<"",    "Dummy feature to disable assembler instructions"  >; -class SubtargetFeatureGeneration <string Value, -                                  list<SubtargetFeature> Implies> : -        SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value, -                          Value#" GPU generation", Implies>; - -def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>; -def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>; -def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>; - -def FeatureR600 : SubtargetFeatureGeneration<"R600", -  [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0] ->; - -def FeatureR700 : SubtargetFeatureGeneration<"R700", -  [FeatureFetchLimit16, FeatureLocalMemorySize0] ->; - -def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN", -  [FeatureFetchLimit16, FeatureLocalMemorySize32768] +def FeatureGCN : SubtargetFeature<"gcn", +  "IsGCN", +  "true", +  "GCN or newer GPU"  >; -def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS", -  [FeatureFetchLimit16, FeatureWavefrontSize64, -   FeatureLocalMemorySize32768] ->; +class AMDGPUSubtargetFeatureGeneration <string Value, +                                  list<SubtargetFeature> Implies> : +        SubtargetFeatureGeneration <Value, "AMDGPUSubtarget", Implies>; -def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS", +def FeatureSouthernIslands : AMDGPUSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",    [FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,    FeatureWavefrontSize64, FeatureGCN,    FeatureLDSBankCount32, FeatureMovrel]  >; -def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS", +def FeatureSeaIslands : AMDGPUSubtargetFeatureGeneration<"SEA_ISLANDS",    [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,    FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,    FeatureCIInsts, FeatureMovrel]  >; -def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS", +def FeatureVolcanicIslands : AMDGPUSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",    [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,     FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,     FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, @@ -535,7 +438,7 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",    ]  >; -def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9", +def FeatureGFX9 : AMDGPUSubtargetFeatureGeneration<"GFX9",    [FeatureFP64, FeatureLocalMemorySize65536,     FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,     FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, @@ -738,8 +641,6 @@ def NullALU : InstrItinClass;  // Predicate helper class  //===----------------------------------------------------------------------===// -def TruePredicate : Predicate<"true">; -  def isSICI : Predicate<    "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"    "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS" @@ -831,36 +732,15 @@ def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">,  def EnableLateCFGStructurize : Predicate<    "EnableLateStructurizeCFG">; -// Exists to help track down where SubtargetPredicate isn't set rather -// than letting tablegen crash with an unhelpful error. -def InvalidPred : Predicate<"predicate not set on instruction or pattern">; - -class PredicateControl { -  Predicate SubtargetPredicate = InvalidPred; -  Predicate SIAssemblerPredicate = isSICI; -  Predicate VIAssemblerPredicate = isVI; -  list<Predicate> AssemblerPredicates = []; -  Predicate AssemblerPredicate = TruePredicate; -  list<Predicate> OtherPredicates = []; -  list<Predicate> Predicates = !listconcat([SubtargetPredicate, -                                            AssemblerPredicate], -                                            AssemblerPredicates, -                                            OtherPredicates); -} - -class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>, -  PredicateControl; - -  // Include AMDGPU TD files -include "R600Schedule.td" -include "R600Processors.td"  include "SISchedule.td"  include "GCNProcessors.td"  include "AMDGPUInstrInfo.td"  include "AMDGPUIntrinsics.td" +include "SIIntrinsics.td"  include "AMDGPURegisterInfo.td"  include "AMDGPURegisterBanks.td"  include "AMDGPUInstructions.td" +include "SIInstrInfo.td"  include "AMDGPUCallingConv.td"  include "AMDGPUSearchableTables.td" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td index 79a64c6abb3..f6d7c1d2218 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -85,17 +85,6 @@ def RetCC_SI_Shader : CallingConv<[    ]>>  ]>; -// Calling convention for R600 -def CC_R600 : CallingConv<[ -  CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[ -    T0_XYZW, T1_XYZW, T2_XYZW, T3_XYZW, T4_XYZW, T5_XYZW, T6_XYZW, T7_XYZW, -    T8_XYZW, T9_XYZW, T10_XYZW, T11_XYZW, T12_XYZW, T13_XYZW, T14_XYZW, T15_XYZW, -    T16_XYZW, T17_XYZW, T18_XYZW, T19_XYZW, T20_XYZW, T21_XYZW, T22_XYZW, -    T23_XYZW, T24_XYZW, T25_XYZW, T26_XYZW, T27_XYZW, T28_XYZW, T29_XYZW, -    T30_XYZW, T31_XYZW, T32_XYZW -  ]>>> -]>; -  // Calling convention for compute kernels  def CC_AMDGPU_Kernel : CallingConv<[    CCCustom<"allocateKernArg"> @@ -165,9 +154,5 @@ def CC_AMDGPU : CallingConv<[     CCIf<"static_cast<const AMDGPUSubtarget&>"           "(State.getMachineFunction().getSubtarget()).getGeneration() >= "             "AMDGPUSubtarget::SOUTHERN_ISLANDS && State.getCallingConv() == CallingConv::C", -        CCDelegateTo<CC_AMDGPU_Func>>, -   CCIf<"static_cast<const AMDGPUSubtarget&>" -          "(State.getMachineFunction().getSubtarget()).getGeneration() < " -            "AMDGPUSubtarget::SOUTHERN_ISLANDS", -        CCDelegateTo<CC_R600>> +        CCDelegateTo<CC_AMDGPU_Func>>  ]>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td b/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td new file mode 100644 index 00000000000..b375cae9018 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td @@ -0,0 +1,60 @@ +//===-- AMDGPUFeatures.td - AMDGPU Feature Definitions -----*- tablegen -*-===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +def FeatureFP64 : SubtargetFeature<"fp64", +  "FP64", +  "true", +  "Enable double precision operations" +>; + +def FeatureFMA : SubtargetFeature<"fmaf", +  "FMA", +  "true", +  "Enable single precision FMA (not as fast as mul+add, but fused)" +>; + +class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature< +  "localmemorysize"#Value, +  "LocalMemorySize", +  !cast<string>(Value), +  "The size of local memory in bytes" +>; + +def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>; +def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>; +def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>; + +class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature< +  "wavefrontsize"#Value, +  "WavefrontSize", +  !cast<string>(Value), +  "The number of threads per wavefront" +>; + +def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>; +def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>; +def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>; + +class SubtargetFeatureGeneration <string Value, string Subtarget, +                                  list<SubtargetFeature> Implies> : +        SubtargetFeature <Value, "Gen", Subtarget#"::"#Value, +                          Value#" GPU generation", Implies>; + +def FeatureDX10Clamp : SubtargetFeature<"dx10-clamp", +  "DX10Clamp", +  "true", +  "clamp modifier clamps NaNs to 0.0" +>; + +def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca", +  "EnablePromoteAlloca", +  "true", +  "Enable promote alloca pass" +>; + diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index db6a837c2bb..49aacd414c7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -104,15 +104,11 @@ private:    bool isNoNanSrc(SDValue N) const;    bool isInlineImmediate(const SDNode *N) const; -  bool isConstantLoad(const MemSDNode *N, int cbID) const;    bool isUniformBr(const SDNode *N) const;    SDNode *glueCopyToM0(SDNode *N) const;    const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; -  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); -  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, -                                       SDValue& Offset);    virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);    virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);    bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, @@ -227,9 +223,18 @@ protected:  };  class R600DAGToDAGISel : public AMDGPUDAGToDAGISel { +  const R600Subtarget *Subtarget; +  AMDGPUAS AMDGPUASI; + +  bool isConstantLoad(const MemSDNode *N, int cbID) const; +  bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); +  bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, +                                       SDValue& Offset);  public:    explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) : -      AMDGPUDAGToDAGISel(TM, OptLevel) {} +      AMDGPUDAGToDAGISel(TM, OptLevel) { +    AMDGPUASI = AMDGPU::getAMDGPUAS(*TM); +      }    void Select(SDNode *N) override; @@ -237,6 +242,11 @@ public:                            SDValue &Offset) override;    bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,                            SDValue &Offset) override; + +  bool runOnMachineFunction(MachineFunction &MF) override; +protected: +  // Include the pieces autogenerated from the target description. +#include "R600GenDAGISel.inc"  };  }  // end anonymous namespace @@ -280,8 +290,7 @@ bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {  }  bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const { -  const SIInstrInfo *TII -    = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo(); +  const SIInstrInfo *TII = Subtarget->getInstrInfo();    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))      return TII->isInlineConstant(C->getAPIntValue()); @@ -637,16 +646,6 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {    SelectCode(N);  } -bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { -  if (!N->readMem()) -    return false; -  if (CbId == -1) -    return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS || -           N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT; - -  return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId; -} -  bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {    const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();    const Instruction *Term = BB->getTerminator(); @@ -662,26 +661,6 @@ StringRef AMDGPUDAGToDAGISel::getPassName() const {  // Complex Patterns  //===----------------------------------------------------------------------===// -bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, -                                                         SDValue& IntPtr) { -  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { -    IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), -                                       true); -    return true; -  } -  return false; -} - -bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, -    SDValue& BaseReg, SDValue &Offset) { -  if (!isa<ConstantSDNode>(Addr)) { -    BaseReg = Addr; -    Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); -    return true; -  } -  return false; -} -  bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,                                              SDValue &Offset) {    return false; @@ -693,11 +672,11 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,    SDLoc DL(Addr);    if ((C = dyn_cast<ConstantSDNode>(Addr))) { -    Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); +    Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);      Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);    } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&               (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { -    Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); +    Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);      Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);    } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&              (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { @@ -2160,6 +2139,41 @@ void AMDGPUDAGToDAGISel::PostprocessISelDAG() {    } while (IsModified);  } +bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { +  Subtarget = &MF.getSubtarget<R600Subtarget>(); +  return SelectionDAGISel::runOnMachineFunction(MF); +} + +bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { +  if (!N->readMem()) +    return false; +  if (CbId == -1) +    return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS || +           N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT; + +  return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId; +} + +bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, +                                                         SDValue& IntPtr) { +  if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { +    IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), +                                       true); +    return true; +  } +  return false; +} + +bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, +    SDValue& BaseReg, SDValue &Offset) { +  if (!isa<ConstantSDNode>(Addr)) { +    BaseReg = Addr; +    Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); +    return true; +  } +  return false; +} +  void R600DAGToDAGISel::Select(SDNode *N) {    unsigned int Opc = N->getOpcode();    if (N->isMachineOpcode()) { @@ -2180,12 +2194,12 @@ void R600DAGToDAGISel::Select(SDNode *N) {      // pass. We want to avoid 128 bits copies as much as possible because they      // can't be bundled by our scheduler.      switch(NumVectorElts) { -    case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; +    case 2: RegClassID = R600::R600_Reg64RegClassID; break;      case 4:        if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) -        RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; +        RegClassID = R600::R600_Reg128VerticalRegClassID;        else -        RegClassID = AMDGPU::R600_Reg128RegClassID; +        RegClassID = R600::R600_Reg128RegClassID;        break;      default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");      } @@ -2203,11 +2217,11 @@ bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,    SDLoc DL(Addr);    if ((C = dyn_cast<ConstantSDNode>(Addr))) { -    Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); +    Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);      Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);    } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&               (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { -    Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); +    Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);      Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);    } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&              (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { @@ -2238,7 +2252,7 @@ bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,               && isInt<16>(IMMOffset->getZExtValue())) {      Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),                                    SDLoc(CurDAG->getEntryNode()), -                                  AMDGPU::ZERO, MVT::i32); +                                  R600::ZERO, MVT::i32);      Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),                                         MVT::i32);      return true; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 7a36ada1347..feb53016f7c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -155,7 +155,7 @@ unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) {  }  AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, -                                           const AMDGPUSubtarget &STI) +                                           const AMDGPUCommonSubtarget &STI)      : TargetLowering(TM), Subtarget(&STI) {    AMDGPUASI = AMDGPU::getAMDGPUAS(TM);    // Lower floating point store/load to integer store/load to reduce the number @@ -330,10 +330,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,    setOperationAction(ISD::FLOG, MVT::f32, Custom);    setOperationAction(ISD::FLOG10, MVT::f32, Custom); -  if (Subtarget->has16BitInsts()) { -    setOperationAction(ISD::FLOG, MVT::f16, Custom); -    setOperationAction(ISD::FLOG10, MVT::f16, Custom); -  }    setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);    setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom); @@ -341,10 +337,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,    setOperationAction(ISD::FREM, MVT::f32, Custom);    setOperationAction(ISD::FREM, MVT::f64, Custom); -  // v_mad_f32 does not support denormals according to some sources. -  if (!Subtarget->hasFP32Denormals()) -    setOperationAction(ISD::FMAD, MVT::f32, Legal); -    // Expand to fneg + fadd.    setOperationAction(ISD::FSUB, MVT::f64, Expand); @@ -359,19 +351,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom);    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom); -  if (Subtarget->getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) { -    setOperationAction(ISD::FCEIL, MVT::f64, Custom); -    setOperationAction(ISD::FTRUNC, MVT::f64, Custom); -    setOperationAction(ISD::FRINT, MVT::f64, Custom); -    setOperationAction(ISD::FFLOOR, MVT::f64, Custom); -  } - -  if (!Subtarget->hasBFI()) { -    // fcopysign can be done in a single instruction with BFI. -    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); -    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); -  } -    setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);    setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);    setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom); @@ -403,12 +382,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,      setOperationAction(ISD::SUBE, VT, Legal);    } -  if (!Subtarget->hasBCNT(32)) -    setOperationAction(ISD::CTPOP, MVT::i32, Expand); - -  if (!Subtarget->hasBCNT(64)) -    setOperationAction(ISD::CTPOP, MVT::i64, Expand); -    // The hardware supports 32-bit ROTR, but not ROTL.    setOperationAction(ISD::ROTL, MVT::i32, Expand);    setOperationAction(ISD::ROTL, MVT::i64, Expand); @@ -428,28 +401,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,    setOperationAction(ISD::SMAX, MVT::i32, Legal);    setOperationAction(ISD::UMAX, MVT::i32, Legal); -  if (Subtarget->hasFFBH()) -    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom); - -  if (Subtarget->hasFFBL()) -    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom); -    setOperationAction(ISD::CTTZ, MVT::i64, Custom);    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom);    setOperationAction(ISD::CTLZ, MVT::i64, Custom);    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom); -  // We only really have 32-bit BFE instructions (and 16-bit on VI). -  // -  // On SI+ there are 64-bit BFEs, but they are scalar only and there isn't any -  // effort to match them now. We want this to be false for i64 cases when the -  // extraction isn't restricted to the upper or lower half. Ideally we would -  // have some pass reduce 64-bit extracts to 32-bit if possible. Extracts that -  // span the midpoint are probably relatively rare, so don't worry about them -  // for now. -  if (Subtarget->hasBFE()) -    setHasExtractBitsInsn(true); -    static const MVT::SimpleValueType VectorIntTypes[] = {      MVT::v2i32, MVT::v4i32    }; @@ -554,11 +510,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,    // vector compares until that is fixed.    setHasMultipleConditionRegisters(true); -  // SI at least has hardware support for floating point exceptions, but no way -  // of using or handling them is implemented. They are also optional in OpenCL -  // (Section 7.3) -  setHasFloatingPointExceptions(Subtarget->hasFPExceptions()); -    PredictableSelectIsExpensive = false;    // We want to find all load dependencies for long chains of stores to enable @@ -781,7 +732,7 @@ bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode * N) const {      {        const LoadSDNode * L = dyn_cast<LoadSDNode>(N);        if (L->getMemOperand()->getAddrSpace() -      == Subtarget->getAMDGPUAS().CONSTANT_ADDRESS_32BIT) +      == AMDGPUASI.CONSTANT_ADDRESS_32BIT)          return true;        return false;      } @@ -4290,9 +4241,11 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(      switch (IID) {      case Intrinsic::amdgcn_mbcnt_lo:      case Intrinsic::amdgcn_mbcnt_hi: { +      const SISubtarget &ST = +          DAG.getMachineFunction().getSubtarget<SISubtarget>();        // These return at most the wavefront size - 1.        unsigned Size = Op.getValueType().getSizeInBits(); -      Known.Zero.setHighBits(Size - Subtarget->getWavefrontSizeLog2()); +      Known.Zero.setHighBits(Size - ST.getWavefrontSizeLog2());        break;      }      default: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 80b5c389f72..c23bf60396c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -23,11 +23,13 @@  namespace llvm {  class AMDGPUMachineFunction; -class AMDGPUSubtarget; +class AMDGPUCommonSubtarget;  struct ArgDescriptor;  class AMDGPUTargetLowering : public TargetLowering {  private: +  const AMDGPUCommonSubtarget *Subtarget; +    /// \returns AMDGPUISD::FFBH_U32 node if the incoming \p Op may have been    /// legalized from a smaller type VT. Need to match pre-legalized type because    /// the generic legalization inserts the add/sub between the select and @@ -39,7 +41,6 @@ public:    static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG);  protected: -  const AMDGPUSubtarget *Subtarget;    AMDGPUAS AMDGPUASI;    SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; @@ -124,7 +125,7 @@ protected:    void analyzeFormalArgumentsCompute(CCState &State,                                const SmallVectorImpl<ISD::InputArg> &Ins) const;  public: -  AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI); +  AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUCommonSubtarget &STI);    bool mayIgnoreSignedZero(SDValue Op) const {      if (getTargetMachine().Options.NoSignedZerosFPMath) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp index 94d6d31ec74..949a3ce6ada 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -8,7 +8,7 @@  //===----------------------------------------------------------------------===//  //  /// \file -/// Implementation of the TargetInstrInfo class that is common to all +/// \brief Implementation of the TargetInstrInfo class that is common to all  /// AMD GPUs.  //  //===----------------------------------------------------------------------===// @@ -23,107 +23,11 @@  using namespace llvm; -#define GET_INSTRINFO_CTOR_DTOR -#include "AMDGPUGenInstrInfo.inc" - -namespace llvm { -namespace AMDGPU { -#define GET_D16ImageDimIntrinsics_IMPL -#define GET_ImageDimIntrinsicTable_IMPL -#define GET_RsrcIntrinsics_IMPL -#include "AMDGPUGenSearchableTables.inc" -} -} -  // Pin the vtable to this file. -void AMDGPUInstrInfo::anchor() {} - -AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST) -  : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN), -    ST(ST), -    AMDGPUASI(ST.getAMDGPUAS()) {} - -// FIXME: This behaves strangely. If, for example, you have 32 load + stores, -// the first 16 loads will be interleaved with the stores, and the next 16 will -// be clustered as expected. It should really split into 2 16 store batches. -// -// Loads are clustered until this returns false, rather than trying to schedule -// groups of stores. This also means we have to deal with saying different -// address space loads should be clustered, and ones which might cause bank -// conflicts. -// -// This might be deprecated so it might not be worth that much effort to fix. -bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, -                                              int64_t Offset0, int64_t Offset1, -                                              unsigned NumLoads) const { -  assert(Offset1 > Offset0 && -         "Second offset should be larger than first offset!"); -  // If we have less than 16 loads in a row, and the offsets are within 64 -  // bytes, then schedule together. - -  // A cacheline is 64 bytes (for global memory). -  return (NumLoads <= 16 && (Offset1 - Offset0) < 64); -} - -// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td -enum SIEncodingFamily { -  SI = 0, -  VI = 1, -  SDWA = 2, -  SDWA9 = 3, -  GFX80 = 4, -  GFX9 = 5 -}; +//void AMDGPUInstrInfo::anchor() {} -static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) { -  switch (ST.getGeneration()) { -  case AMDGPUSubtarget::SOUTHERN_ISLANDS: -  case AMDGPUSubtarget::SEA_ISLANDS: -    return SIEncodingFamily::SI; -  case AMDGPUSubtarget::VOLCANIC_ISLANDS: -  case AMDGPUSubtarget::GFX9: -    return SIEncodingFamily::VI; +AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST) { } -  // FIXME: This should never be called for r600 GPUs. -  case AMDGPUSubtarget::R600: -  case AMDGPUSubtarget::R700: -  case AMDGPUSubtarget::EVERGREEN: -  case AMDGPUSubtarget::NORTHERN_ISLANDS: -    return SIEncodingFamily::SI; -  } - -  llvm_unreachable("Unknown subtarget generation!"); -} - -int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const { -  SIEncodingFamily Gen = subtargetEncodingFamily(ST); - -  if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 && -    ST.getGeneration() >= AMDGPUSubtarget::GFX9) -    Gen = SIEncodingFamily::GFX9; - -  if (get(Opcode).TSFlags & SIInstrFlags::SDWA) -    Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9 -                                                      : SIEncodingFamily::SDWA; -  // Adjust the encoding family to GFX80 for D16 buffer instructions when the -  // subtarget has UnpackedD16VMem feature. -  // TODO: remove this when we discard GFX80 encoding. -  if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf)) -    Gen = SIEncodingFamily::GFX80; - -  int MCOp = AMDGPU::getMCOpcode(Opcode, Gen); - -  // -1 means that Opcode is already a native instruction. -  if (MCOp == -1) -    return Opcode; - -  // (uint16_t)-1 means that Opcode is a pseudo instruction that has -  // no encoding in the given subtarget generation. -  if (MCOp == (uint16_t)-1) -    return -1; - -  return MCOp; -}  // TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.  bool AMDGPUInstrInfo::isUniformMMO(const MachineMemOperand *MMO) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h index 48f84a4c177..c12124ea07c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -20,10 +20,6 @@  #include "Utils/AMDGPUBaseInfo.h"  #include "llvm/CodeGen/TargetInstrInfo.h" -#define GET_INSTRINFO_HEADER -#include "AMDGPUGenInstrInfo.inc" -#undef GET_INSTRINFO_HEADER -  namespace llvm {  class AMDGPUSubtarget; @@ -31,26 +27,10 @@ class MachineFunction;  class MachineInstr;  class MachineInstrBuilder; -class AMDGPUInstrInfo : public AMDGPUGenInstrInfo { -private: -  const AMDGPUSubtarget &ST; - -  virtual void anchor(); -protected: -  AMDGPUAS AMDGPUASI; - +class AMDGPUInstrInfo {  public:    explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st); -  bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, -                               int64_t Offset1, int64_t Offset2, -                               unsigned NumLoads) const override; - -  /// Return a target-specific opcode if Opcode is a pseudo instruction. -  /// Return -1 if the target-specific opcode for the pseudo instruction does -  /// not exist. If Opcode is not a pseudo instruction, this is identity. -  int pseudoToMCOpcode(int Opcode) const; -    static bool isUniformMMO(const MachineMemOperand *MMO);  }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index c474a85595b..c73f334da86 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -42,6 +42,47 @@ class AMDGPUShaderInst <dag outs, dag ins, string asm = "",    field bits<32> Inst = 0xffffffff;  } +//===---------------------------------------------------------------------===// +// Return instruction +//===---------------------------------------------------------------------===// + +class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern> +: Instruction { + +     let Namespace = "AMDGPU"; +     dag OutOperandList = outs; +     dag InOperandList = ins; +     let Pattern = pattern; +     let AsmString = !strconcat(asmstr, "\n"); +     let isPseudo = 1; +     let Itinerary = NullALU; +     bit hasIEEEFlag = 0; +     bit hasZeroOpFlag = 0; +     let mayLoad = 0; +     let mayStore = 0; +     let hasSideEffects = 0; +     let isCodeGenOnly = 1; +} + +def TruePredicate : Predicate<"true">; + +// Exists to help track down where SubtargetPredicate isn't set rather +// than letting tablegen crash with an unhelpful error. +def InvalidPred : Predicate<"predicate not set on instruction or pattern">; + +class PredicateControl { +  Predicate SubtargetPredicate = InvalidPred; +  list<Predicate> AssemblerPredicates = []; +  Predicate AssemblerPredicate = TruePredicate; +  list<Predicate> OtherPredicates = []; +  list<Predicate> Predicates = !listconcat([SubtargetPredicate, +                                            AssemblerPredicate], +                                            AssemblerPredicates, +                                            OtherPredicates); +} +class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>, +      PredicateControl; +  def FP16Denormals : Predicate<"Subtarget->hasFP16Denormals()">;  def FP32Denormals : Predicate<"Subtarget->hasFP32Denormals()">;  def FP64Denormals : Predicate<"Subtarget->hasFP64Denormals()">; @@ -94,12 +135,6 @@ def brtarget   : Operand<OtherVT>;  // Misc. PatFrags  //===----------------------------------------------------------------------===// -class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag< -  (ops node:$src0), -  (op $src0), -  [{ return N->hasOneUse(); }] ->; -  class HasOneUseBinOp<SDPatternOperator op> : PatFrag<    (ops node:$src0, node:$src1),    (op $src0, $src1), @@ -112,8 +147,6 @@ class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<    [{ return N->hasOneUse(); }]  >; -def trunc_oneuse : HasOneUseUnaryOp<trunc>; -  let Properties = [SDNPCommutative, SDNPAssociative] in {  def smax_oneuse : HasOneUseBinOp<smax>;  def smin_oneuse : HasOneUseBinOp<smin>; @@ -240,6 +273,37 @@ def COND_NULL : PatLeaf <    [{(void)N; return false;}]  >; +//===----------------------------------------------------------------------===// +// PatLeafs for Texture Constants +//===----------------------------------------------------------------------===// + +def TEX_ARRAY : PatLeaf< +  (imm), +  [{uint32_t TType = (uint32_t)N->getZExtValue(); +    return TType == 9 || TType == 10 || TType == 16; +  }] +>; + +def TEX_RECT : PatLeaf< +  (imm), +  [{uint32_t TType = (uint32_t)N->getZExtValue(); +    return TType == 5; +  }] +>; + +def TEX_SHADOW : PatLeaf< +  (imm), +  [{uint32_t TType = (uint32_t)N->getZExtValue(); +    return (TType >= 6 && TType <= 8) || TType == 13; +  }] +>; + +def TEX_SHADOW_ARRAY : PatLeaf< +  (imm), +  [{uint32_t TType = (uint32_t)N->getZExtValue(); +    return TType == 11 || TType == 12 || TType == 17; +  }] +>;  //===----------------------------------------------------------------------===//  // Load/Store Pattern Fragments @@ -769,11 +833,3 @@ class RsqPat<Instruction RsqInst, ValueType vt> : AMDGPUPat <    (AMDGPUrcp (fsqrt vt:$src)),    (RsqInst $src)  >; - -include "R600Instructions.td" -include "R700Instructions.td" -include "EvergreenInstructions.td" -include "CaymanInstructions.td" - -include "SIInstrInfo.td" - diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td b/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td index 63d661ad891..230a0462850 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td @@ -14,5 +14,3 @@  let TargetPrefix = "AMDGPU", isTarget = 1 in {    def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;  } - -include "SIIntrinsics.td" diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp index 7e0e9802c0e..3cb3129706f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp @@ -117,7 +117,6 @@ bool AMDGPULowerIntrinsics::makeLIDRangeMetadata(Function &F) const {      return false;    const TargetMachine &TM = TPC->getTM<TargetMachine>(); -  const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(F);    bool Changed = false;    for (auto *U : F.users()) { @@ -125,7 +124,7 @@ bool AMDGPULowerIntrinsics::makeLIDRangeMetadata(Function &F) const {      if (!CI)        continue; -    Changed |= ST.makeLIDRangeMetadata(CI); +    Changed |= AMDGPUCommonSubtarget::get(TM, F).makeLIDRangeMetadata(CI);    }    return Changed;  } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 2f83e606047..ffe34831623 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -152,7 +152,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {    IsAMDGCN = TT.getArch() == Triple::amdgcn;    IsAMDHSA = TT.getOS() == Triple::AMDHSA; -  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F); +  const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F);    if (!ST.isPromoteAllocaEnabled())      return false; @@ -174,8 +174,8 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {  std::pair<Value *, Value *>  AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) { -  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>( -                                *Builder.GetInsertBlock()->getParent()); +  const Function &F = *Builder.GetInsertBlock()->getParent(); +  const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F);    if (!IsAMDHSA) {      Function *LocalSizeYFn @@ -261,8 +261,8 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {  }  Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) { -  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>( -                                *Builder.GetInsertBlock()->getParent()); +  const AMDGPUCommonSubtarget &ST = +      AMDGPUCommonSubtarget::get(*TM, *Builder.GetInsertBlock()->getParent());    Intrinsic::ID IntrID = Intrinsic::ID::not_intrinsic;    switch (N) { @@ -602,7 +602,7 @@ bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes(  bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {    FunctionType *FTy = F.getFunctionType(); -  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F); +  const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F);    // If the function has any arguments in the local address space, then it's    // possible these arguments require the entire local memory space, so @@ -729,8 +729,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {    if (!SufficientLDS)      return false; -  const AMDGPUSubtarget &ST = -    TM->getSubtarget<AMDGPUSubtarget>(ContainingFunction); +  const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, ContainingFunction);    unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second;    const DataLayout &DL = Mod->getDataLayout(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td index 3bbcba826f6..ceabae52441 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.td @@ -19,5 +19,4 @@ foreach Index = 0-15 in {  } -include "R600RegisterInfo.td"  include "SIRegisterInfo.td" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index b632298ba17..be775a1ae6b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -23,6 +23,7 @@  #include "MCTargetDesc/AMDGPUMCTargetDesc.h"  #include "llvm/ADT/SmallString.h"  #include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/MC/MCSubtargetInfo.h"  #include "llvm/IR/MDBuilder.h"  #include "llvm/CodeGen/TargetFrameLowering.h"  #include <algorithm> @@ -34,9 +35,32 @@ using namespace llvm;  #define GET_SUBTARGETINFO_TARGET_DESC  #define GET_SUBTARGETINFO_CTOR  #include "AMDGPUGenSubtargetInfo.inc" +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "R600GenSubtargetInfo.inc"  AMDGPUSubtarget::~AMDGPUSubtarget() = default; +R600Subtarget & +R600Subtarget::initializeSubtargetDependencies(const Triple &TT, +                                               StringRef GPU, StringRef FS) { +  SmallString<256> FullFS("+promote-alloca,+dx10-clamp,"); +  FullFS += FS; +  ParseSubtargetFeatures(GPU, FullFS); + +  // FIXME: I don't think think Evergreen has any useful support for +  // denormals, but should be checked. Should we issue a warning somewhere +  // if someone tries to enable these? +  if (getGeneration() <= R600Subtarget::NORTHERN_ISLANDS) { +    FP32Denormals = false; +  } + +  HasMulU24 = getGeneration() >= EVERGREEN; +  HasMulI24 = hasCaymanISA(); + +  return *this; +} +  AMDGPUSubtarget &  AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,                                                   StringRef GPU, StringRef FS) { @@ -93,26 +117,44 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,        HasMovrel = true;    } +  HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS; +    return *this;  } +AMDGPUCommonSubtarget::AMDGPUCommonSubtarget(const Triple &TT, +                                             const FeatureBitset &FeatureBits) : +  TargetTriple(TT), +  SubtargetFeatureBits(FeatureBits), +  Has16BitInsts(false), +  HasMadMixInsts(false), +  FP32Denormals(false), +  FPExceptions(false), +  HasSDWA(false), +  HasVOP3PInsts(false), +  HasMulI24(true), +  HasMulU24(true), +  HasFminFmaxLegacy(true), +  EnablePromoteAlloca(false), +  LocalMemorySize(0), +  WavefrontSize(0) +  { } +  AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, -                                 const TargetMachine &TM) -  : AMDGPUGenSubtargetInfo(TT, GPU, FS), +                                 const TargetMachine &TM) : +    AMDGPUGenSubtargetInfo(TT, GPU, FS), +    AMDGPUCommonSubtarget(TT, getFeatureBits()), +    FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),      TargetTriple(TT), -    Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600), +    Gen(SOUTHERN_ISLANDS),      IsaVersion(ISAVersion0_0_0), -    WavefrontSize(0), -    LocalMemorySize(0),      LDSBankCount(0),      MaxPrivateElementSize(0),      FastFMAF32(false),      HalfRate64Ops(false), -    FP32Denormals(false),      FP64FP16Denormals(false), -    FPExceptions(false),      DX10Clamp(false),      FlatForGlobal(false),      AutoWaitcntBeforeBarrier(false), @@ -128,7 +170,6 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,      EnableHugePrivateBuffer(false),      EnableVGPRSpilling(false), -    EnablePromoteAlloca(false),      EnableLoadStoreOpt(false),      EnableUnsafeDSOffsetFolding(false),      EnableSIScheduler(false), @@ -136,25 +177,18 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,      DumpCode(false),      FP64(false), -    FMA(false), -    MIMG_R128(false), -    IsGCN(false),      GCN3Encoding(false),      CIInsts(false),      GFX9Insts(false),      SGPRInitBug(false),      HasSMemRealTime(false), -    Has16BitInsts(false),      HasIntClamp(false), -    HasVOP3PInsts(false), -    HasMadMixInsts(false),      HasFmaMixInsts(false),      HasMovrel(false),      HasVGPRIndexMode(false),      HasScalarStores(false),      HasScalarAtomics(false),      HasInv2PiInlineImm(false), -    HasSDWA(false),      HasSDWAOmod(false),      HasSDWAScalar(false),      HasSDWASdst(false), @@ -170,20 +204,14 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,      AddNoCarryInsts(false),      HasUnpackedD16VMem(false), -    R600ALUInst(false), -    CaymanISA(false), -    CFALUBug(false), -    HasVertexCache(false), -    TexVTXClauseSize(0),      ScalarizeGlobal(false), -    FeatureDisable(false), -    InstrItins(getInstrItineraryForCPU(GPU)) { +    FeatureDisable(false) {    AS = AMDGPU::getAMDGPUAS(TT);    initializeSubtargetDependencies(TT, GPU, FS);  } -unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves, +unsigned AMDGPUCommonSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,    const Function &F) const {    if (NWaves == 1)      return getLocalMemorySize(); @@ -193,7 +221,7 @@ unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,    return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves;  } -unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes, +unsigned AMDGPUCommonSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,    const Function &F) const {    unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;    unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize); @@ -206,13 +234,13 @@ unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,  }  unsigned -AMDGPUSubtarget::getOccupancyWithLocalMemSize(const MachineFunction &MF) const { +AMDGPUCommonSubtarget::getOccupancyWithLocalMemSize(const MachineFunction &MF) const {    const auto *MFI = MF.getInfo<SIMachineFunctionInfo>();    return getOccupancyWithLocalMemSize(MFI->getLDSSize(), MF.getFunction());  }  std::pair<unsigned, unsigned> -AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const { +AMDGPUCommonSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {    switch (CC) {    case CallingConv::AMDGPU_CS:    case CallingConv::AMDGPU_KERNEL: @@ -230,7 +258,7 @@ AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {    }  } -std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes( +std::pair<unsigned, unsigned> AMDGPUCommonSubtarget::getFlatWorkGroupSizes(    const Function &F) const {    // FIXME: 1024 if function.    // Default minimum/maximum flat work group sizes. @@ -260,7 +288,7 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(    return Requested;  } -std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU( +std::pair<unsigned, unsigned> AMDGPUCommonSubtarget::getWavesPerEU(    const Function &F) const {    // Default minimum/maximum number of waves per execution unit.    std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU()); @@ -308,7 +336,7 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(    return Requested;  } -bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const { +bool AMDGPUCommonSubtarget::makeLIDRangeMetadata(Instruction *I) const {    Function *Kernel = I->getParent()->getParent();    unsigned MinSize = 0;    unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second; @@ -372,10 +400,22 @@ bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {  R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,                               const TargetMachine &TM) : -  AMDGPUSubtarget(TT, GPU, FS, TM), +  R600GenSubtargetInfo(TT, GPU, FS), +  AMDGPUCommonSubtarget(TT, getFeatureBits()),    InstrInfo(*this),    FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), -  TLInfo(TM, *this) {} +  FMA(false), +  CaymanISA(false), +  CFALUBug(false), +  DX10Clamp(false), +  HasVertexCache(false), +  R600ALUInst(false), +  FP64(false), +  TexVTXClauseSize(0), +  Gen(R600), +  TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)), +  InstrItins(getInstrItineraryForCPU(GPU)), +  AS (AMDGPU::getAMDGPUAS(TT)) { }  SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,                           const GCNTargetMachine &TM) @@ -619,3 +659,17 @@ void SISubtarget::getPostRAMutations(      std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {    Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo));  } + +const AMDGPUCommonSubtarget &AMDGPUCommonSubtarget::get(const MachineFunction &MF) { +  if (MF.getTarget().getTargetTriple().getArch() == Triple::amdgcn) +    return static_cast<const AMDGPUCommonSubtarget&>(MF.getSubtarget<AMDGPUSubtarget>()); +  else +    return static_cast<const AMDGPUCommonSubtarget&>(MF.getSubtarget<R600Subtarget>()); +} + +const AMDGPUCommonSubtarget &AMDGPUCommonSubtarget::get(const TargetMachine &TM, const Function &F) { +  if (TM.getTargetTriple().getArch() == Triple::amdgcn) +    return static_cast<const AMDGPUCommonSubtarget&>(TM.getSubtarget<AMDGPUSubtarget>(F)); +  else +    return static_cast<const AMDGPUCommonSubtarget&>(TM.getSubtarget<R600Subtarget>(F)); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 333b99f9ced..9c8b82c2834 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -39,22 +39,181 @@  #define GET_SUBTARGETINFO_HEADER  #include "AMDGPUGenSubtargetInfo.inc" +#define GET_SUBTARGETINFO_HEADER +#include "R600GenSubtargetInfo.inc"  namespace llvm {  class StringRef; -class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { +class AMDGPUCommonSubtarget { +private: +  Triple TargetTriple; + +protected: +  const FeatureBitset &SubtargetFeatureBits; +  bool Has16BitInsts; +  bool HasMadMixInsts; +  bool FP32Denormals; +  bool FPExceptions; +  bool HasSDWA; +  bool HasVOP3PInsts; +  bool HasMulI24; +  bool HasMulU24; +  bool HasFminFmaxLegacy; +  bool EnablePromoteAlloca; +  int LocalMemorySize; +  unsigned WavefrontSize; + +public: +  AMDGPUCommonSubtarget(const Triple &TT, const FeatureBitset &FeatureBits); + +  static const AMDGPUCommonSubtarget &get(const MachineFunction &MF); +  static const AMDGPUCommonSubtarget &get(const TargetMachine &TM, +                                          const Function &F); + +  /// \returns Default range flat work group size for a calling convention. +  std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const; + +  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes +  /// for function \p F, or minimum/maximum flat work group sizes explicitly +  /// requested using "amdgpu-flat-work-group-size" attribute attached to +  /// function \p F. +  /// +  /// \returns Subtarget's default values if explicitly requested values cannot +  /// be converted to integer, or violate subtarget's specifications. +  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const; + +  /// \returns Subtarget's default pair of minimum/maximum number of waves per +  /// execution unit for function \p F, or minimum/maximum number of waves per +  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute +  /// attached to function \p F. +  /// +  /// \returns Subtarget's default values if explicitly requested values cannot +  /// be converted to integer, violate subtarget's specifications, or are not +  /// compatible with minimum/maximum number of waves limited by flat work group +  /// size, register usage, and/or lds usage. +  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const; + +  /// Return the amount of LDS that can be used that will not restrict the +  /// occupancy lower than WaveCount. +  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, +                                           const Function &) const; + +  /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if +  /// the given LDS memory size is the only constraint. +  unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const; + +  unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const; + +  bool isAmdHsaOS() const { +    return TargetTriple.getOS() == Triple::AMDHSA; +  } + +  bool isAmdPalOS() const { +    return TargetTriple.getOS() == Triple::AMDPAL; +  } + +  bool has16BitInsts() const { +    return Has16BitInsts; +  } + +  bool hasMadMixInsts() const { +    return HasMadMixInsts; +  } + +  bool hasFP32Denormals() const { +    return FP32Denormals; +  } + +  bool hasFPExceptions() const { +    return FPExceptions; +  } + +  bool hasSDWA() const { +    return HasSDWA; +  } + +  bool hasVOP3PInsts() const { +    return HasVOP3PInsts; +  } + +  bool hasMulI24() const { +    return HasMulI24; +  } + +  bool hasMulU24() const { +    return HasMulU24; +  } + +  bool hasFminFmaxLegacy() const { +    return HasFminFmaxLegacy; +  } + +  bool isPromoteAllocaEnabled() const { +    return EnablePromoteAlloca; +  } + +  unsigned getWavefrontSize() const { +    return WavefrontSize; +  } + +  int getLocalMemorySize() const { +    return LocalMemorySize; +  } + +  unsigned getAlignmentForImplicitArgPtr() const { +    return isAmdHsaOS() ? 8 : 4; +  } + +  /// \returns Maximum number of work groups per compute unit supported by the +  /// subtarget and limited by given \p FlatWorkGroupSize. +  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const { +    return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits, +                                                  FlatWorkGroupSize); +  } + +  /// \returns Minimum flat work group size supported by the subtarget. +  unsigned getMinFlatWorkGroupSize() const { +    return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits); +  } + +  /// \returns Maximum flat work group size supported by the subtarget. +  unsigned getMaxFlatWorkGroupSize() const { +    return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits); +  } + +  /// \returns Maximum number of waves per execution unit supported by the +  /// subtarget and limited by given \p FlatWorkGroupSize. +  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const { +    return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits, +                                             FlatWorkGroupSize); +  } + +  /// \returns Minimum number of waves per execution unit supported by the +  /// subtarget. +  unsigned getMinWavesPerEU() const { +    return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits); +  } + +  unsigned getMaxWavesPerEU() const { return 10; } + +  /// Creates value range metadata on an workitemid.* inrinsic call or load. +  bool makeLIDRangeMetadata(Instruction *I) const; + +  virtual ~AMDGPUCommonSubtarget() {} +}; + +class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo, +                        public AMDGPUCommonSubtarget {  public:    enum Generation { -    R600 = 0, -    R700, -    EVERGREEN, -    NORTHERN_ISLANDS, -    SOUTHERN_ISLANDS, -    SEA_ISLANDS, -    VOLCANIC_ISLANDS, -    GFX9, +    // Gap for R600 generations, so we can do comparisons between +    // AMDGPUSubtarget and r600Subtarget. +    SOUTHERN_ISLANDS = 4, +    SEA_ISLANDS = 5, +    VOLCANIC_ISLANDS = 6, +    GFX9 = 7,    };    enum { @@ -96,13 +255,20 @@ public:      LLVMTrapHandlerRegValue = 1    }; +private: +  SIFrameLowering FrameLowering; + +  /// GlobalISel related APIs. +  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo; +  std::unique_ptr<InstructionSelector> InstSelector; +  std::unique_ptr<LegalizerInfo> Legalizer; +  std::unique_ptr<RegisterBankInfo> RegBankInfo; +  protected:    // Basic subtarget description.    Triple TargetTriple; -  Generation Gen; +  unsigned Gen;    unsigned IsaVersion; -  unsigned WavefrontSize; -  int LocalMemorySize;    int LDSBankCount;    unsigned MaxPrivateElementSize; @@ -111,9 +277,7 @@ protected:    bool HalfRate64Ops;    // Dynamially set bits that enable features. -  bool FP32Denormals;    bool FP64FP16Denormals; -  bool FPExceptions;    bool DX10Clamp;    bool FlatForGlobal;    bool AutoWaitcntBeforeBarrier; @@ -129,7 +293,6 @@ protected:    // Used as options.    bool EnableHugePrivateBuffer;    bool EnableVGPRSpilling; -  bool EnablePromoteAlloca;    bool EnableLoadStoreOpt;    bool EnableUnsafeDSOffsetFolding;    bool EnableSIScheduler; @@ -146,17 +309,13 @@ protected:    bool GFX9Insts;    bool SGPRInitBug;    bool HasSMemRealTime; -  bool Has16BitInsts;    bool HasIntClamp; -  bool HasVOP3PInsts; -  bool HasMadMixInsts;    bool HasFmaMixInsts;    bool HasMovrel;    bool HasVGPRIndexMode;    bool HasScalarStores;    bool HasScalarAtomics;    bool HasInv2PiInlineImm; -  bool HasSDWA;    bool HasSDWAOmod;    bool HasSDWAScalar;    bool HasSDWASdst; @@ -181,7 +340,6 @@ protected:    // Dummy feature to use for assembler in tablegen.    bool FeatureDisable; -  InstrItineraryData InstrItins;    SelectionDAGTargetInfo TSInfo;    AMDGPUAS AS; @@ -193,13 +351,30 @@ public:    AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT,                                                     StringRef GPU, StringRef FS); -  const AMDGPUInstrInfo *getInstrInfo() const override = 0; -  const AMDGPUFrameLowering *getFrameLowering() const override = 0; -  const AMDGPUTargetLowering *getTargetLowering() const override = 0; -  const AMDGPURegisterInfo *getRegisterInfo() const override = 0; +  virtual const SIInstrInfo *getInstrInfo() const override = 0; -  const InstrItineraryData *getInstrItineraryData() const override { -    return &InstrItins; +  const SIFrameLowering *getFrameLowering() const override { +    return &FrameLowering; +  } + +  virtual const SITargetLowering *getTargetLowering() const override = 0; + +  virtual const SIRegisterInfo *getRegisterInfo() const override = 0; + +  const CallLowering *getCallLowering() const override { +    return CallLoweringInfo.get(); +  } + +  const InstructionSelector *getInstructionSelector() const override { +    return InstSelector.get(); +  } + +  const LegalizerInfo *getLegalizerInfo() const override { +    return Legalizer.get(); +  } + +  const RegisterBankInfo *getRegBankInfo() const override { +    return RegBankInfo.get();    }    // Nothing implemented, just prevent crashes on use. @@ -209,34 +384,18 @@ public:    void ParseSubtargetFeatures(StringRef CPU, StringRef FS); -  bool isAmdHsaOS() const { -    return TargetTriple.getOS() == Triple::AMDHSA; -  } -    bool isMesa3DOS() const {      return TargetTriple.getOS() == Triple::Mesa3D;    } -  bool isAmdPalOS() const { -    return TargetTriple.getOS() == Triple::AMDPAL; -  } -    Generation getGeneration() const { -    return Gen; -  } - -  unsigned getWavefrontSize() const { -    return WavefrontSize; +    return (Generation)Gen;    }    unsigned getWavefrontSizeLog2() const {      return Log2_32(WavefrontSize);    } -  int getLocalMemorySize() const { -    return LocalMemorySize; -  } -    int getLDSBankCount() const {      return LDSBankCount;    } @@ -249,18 +408,10 @@ public:      return AS;    } -  bool has16BitInsts() const { -    return Has16BitInsts; -  } -    bool hasIntClamp() const {      return HasIntClamp;    } -  bool hasVOP3PInsts() const { -    return HasVOP3PInsts; -  } -    bool hasFP64() const {      return FP64;    } @@ -269,6 +420,10 @@ public:      return MIMG_R128;    } +  bool hasHWFP64() const { +    return FP64; +  } +    bool hasFastFMAF32() const {      return FastFMAF32;    } @@ -278,15 +433,15 @@ public:    }    bool hasAddr64() const { -    return (getGeneration() < VOLCANIC_ISLANDS); +    return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);    }    bool hasBFE() const { -    return (getGeneration() >= EVERGREEN); +    return true;    }    bool hasBFI() const { -    return (getGeneration() >= EVERGREEN); +    return true;    }    bool hasBFM() const { @@ -294,42 +449,23 @@ public:    }    bool hasBCNT(unsigned Size) const { -    if (Size == 32) -      return (getGeneration() >= EVERGREEN); - -    if (Size == 64) -      return (getGeneration() >= SOUTHERN_ISLANDS); - -    return false; -  } - -  bool hasMulU24() const { -    return (getGeneration() >= EVERGREEN); -  } - -  bool hasMulI24() const { -    return (getGeneration() >= SOUTHERN_ISLANDS || -            hasCaymanISA()); +    return true;    }    bool hasFFBL() const { -    return (getGeneration() >= EVERGREEN); +    return true;    }    bool hasFFBH() const { -    return (getGeneration() >= EVERGREEN); +    return true;    }    bool hasMed3_16() const { -    return getGeneration() >= GFX9; +    return getGeneration() >= AMDGPUSubtarget::GFX9;    }    bool hasMin3Max3_16() const { -    return getGeneration() >= GFX9; -  } - -  bool hasMadMixInsts() const { -    return HasMadMixInsts; +    return getGeneration() >= AMDGPUSubtarget::GFX9;    }    bool hasFmaMixInsts() const { @@ -337,15 +473,7 @@ public:    }    bool hasCARRY() const { -    return (getGeneration() >= EVERGREEN); -  } - -  bool hasBORROW() const { -    return (getGeneration() >= EVERGREEN); -  } - -  bool hasCaymanISA() const { -    return CaymanISA; +    return true;    }    bool hasFMA() const { @@ -360,10 +488,6 @@ public:      return EnableHugePrivateBuffer;    } -  bool isPromoteAllocaEnabled() const { -    return EnablePromoteAlloca; -  } -    bool unsafeDSOffsetFoldingEnabled() const {      return EnableUnsafeDSOffsetFolding;    } @@ -377,20 +501,10 @@ public:    unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,                                             const Function &) const; -  /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if -  /// the given LDS memory size is the only constraint. -  unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const; - -  unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const; -    bool hasFP16Denormals() const {      return FP64FP16Denormals;    } -  bool hasFP32Denormals() const { -    return FP32Denormals; -  } -    bool hasFP64Denormals() const {      return FP64FP16Denormals;    } @@ -399,10 +513,6 @@ public:      return getGeneration() >= AMDGPUSubtarget::GFX9;    } -  bool hasFPExceptions() const { -    return FPExceptions; -  } -    bool enableDX10Clamp() const {      return DX10Clamp;    } @@ -444,7 +554,7 @@ public:    }    bool hasApertureRegs() const { -   return HasApertureRegs; +    return HasApertureRegs;    }    bool isTrapHandlerEnabled() const { @@ -510,14 +620,6 @@ public:      return getGeneration() >= SEA_ISLANDS;    } -  bool hasFminFmaxLegacy() const { -    return getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS; -  } - -  bool hasSDWA() const { -    return HasSDWA; -  } -    bool hasSDWAOmod() const {      return HasSDWAOmod;    } @@ -556,10 +658,6 @@ public:      return isAmdCodeObjectV2(F) ? 0 : 36;    } -  unsigned getAlignmentForImplicitArgPtr() const { -    return isAmdHsaOS() ? 8 : 4; -  } -    /// \returns Number of bytes of arguments that are passed to a shader or    /// kernel in addition to the explicit ones declared for the function.    unsigned getImplicitArgNumBytes(const Function &F) const { @@ -588,134 +686,39 @@ public:      return true;    } -  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;} -  bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;} +  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; } +  bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }    /// \returns Number of execution units per compute unit supported by the    /// subtarget.    unsigned getEUsPerCU() const { -    return AMDGPU::IsaInfo::getEUsPerCU(getFeatureBits()); -  } - -  /// \returns Maximum number of work groups per compute unit supported by the -  /// subtarget and limited by given \p FlatWorkGroupSize. -  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const { -    return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(getFeatureBits(), -                                                  FlatWorkGroupSize); +    return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits());    }    /// \returns Maximum number of waves per compute unit supported by the    /// subtarget without any kind of limitation.    unsigned getMaxWavesPerCU() const { -    return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits()); +    return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits());    }    /// \returns Maximum number of waves per compute unit supported by the    /// subtarget and limited by given \p FlatWorkGroupSize.    unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const { -    return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits(), +    return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(),                                               FlatWorkGroupSize);    } -  /// \returns Minimum number of waves per execution unit supported by the -  /// subtarget. -  unsigned getMinWavesPerEU() const { -    return AMDGPU::IsaInfo::getMinWavesPerEU(getFeatureBits()); -  } -    /// \returns Maximum number of waves per execution unit supported by the    /// subtarget without any kind of limitation.    unsigned getMaxWavesPerEU() const { -    return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits()); -  } - -  /// \returns Maximum number of waves per execution unit supported by the -  /// subtarget and limited by given \p FlatWorkGroupSize. -  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const { -    return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits(), -                                             FlatWorkGroupSize); -  } - -  /// \returns Minimum flat work group size supported by the subtarget. -  unsigned getMinFlatWorkGroupSize() const { -    return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(getFeatureBits()); -  } - -  /// \returns Maximum flat work group size supported by the subtarget. -  unsigned getMaxFlatWorkGroupSize() const { -    return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(getFeatureBits()); +    return AMDGPU::IsaInfo::getMaxWavesPerEU();    }    /// \returns Number of waves per work group supported by the subtarget and    /// limited by given \p FlatWorkGroupSize.    unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const { -    return AMDGPU::IsaInfo::getWavesPerWorkGroup(getFeatureBits(), -                                                 FlatWorkGroupSize); -  } - -  /// \returns Default range flat work group size for a calling convention. -  std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const; - -  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes -  /// for function \p F, or minimum/maximum flat work group sizes explicitly -  /// requested using "amdgpu-flat-work-group-size" attribute attached to -  /// function \p F. -  /// -  /// \returns Subtarget's default values if explicitly requested values cannot -  /// be converted to integer, or violate subtarget's specifications. -  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const; - -  /// \returns Subtarget's default pair of minimum/maximum number of waves per -  /// execution unit for function \p F, or minimum/maximum number of waves per -  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute -  /// attached to function \p F. -  /// -  /// \returns Subtarget's default values if explicitly requested values cannot -  /// be converted to integer, violate subtarget's specifications, or are not -  /// compatible with minimum/maximum number of waves limited by flat work group -  /// size, register usage, and/or lds usage. -  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const; - -  /// Creates value range metadata on an workitemid.* inrinsic call or load. -  bool makeLIDRangeMetadata(Instruction *I) const; -}; - -class R600Subtarget final : public AMDGPUSubtarget { -private: -  R600InstrInfo InstrInfo; -  R600FrameLowering FrameLowering; -  R600TargetLowering TLInfo; - -public: -  R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS, -                const TargetMachine &TM); - -  const R600InstrInfo *getInstrInfo() const override { -    return &InstrInfo; -  } - -  const R600FrameLowering *getFrameLowering() const override { -    return &FrameLowering; -  } - -  const R600TargetLowering *getTargetLowering() const override { -    return &TLInfo; -  } - -  const R600RegisterInfo *getRegisterInfo() const override { -    return &InstrInfo.getRegisterInfo(); -  } - -  bool hasCFAluBug() const { -    return CFALUBug; -  } - -  bool hasVertexCache() const { -    return HasVertexCache; -  } - -  short getTexVTXClauseSize() const { -    return TexVTXClauseSize; +    return AMDGPU::IsaInfo::getWavesPerWorkGroup( +        MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);    }  }; @@ -766,6 +769,8 @@ public:    const SIRegisterInfo *getRegisterInfo() const override {      return &InstrInfo.getRegisterInfo();    } +  // static wrappers +  static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);    // XXX - Why is this here if it isn't in the default pass set?    bool enableEarlyIfConversion() const override { @@ -775,7 +780,7 @@ public:    void overrideSchedPolicy(MachineSchedPolicy &Policy,                             unsigned NumRegionInstrs) const override; -  bool isVGPRSpillingEnabled(const Function& F) const; +  bool isVGPRSpillingEnabled(const Function &F) const;    unsigned getMaxNumUserSGPRs() const {      return 16; @@ -860,16 +865,18 @@ public:    unsigned getKernArgSegmentSize(const Function &F,                                   unsigned ExplictArgBytes) const; -  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs +  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs +  /// SGPRs    unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; -  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs +  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs +  /// VGPRs    unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;    /// \returns true if the flat_scratch register should be initialized with the    /// pointer to the wave's scratch memory rather than a size and offset.    bool flatScratchIsPointer() const { -    return getGeneration() >= GFX9; +    return getGeneration() >= AMDGPUSubtarget::GFX9;    }    /// \returns true if the machine has merged shaders in which s0-s7 are @@ -880,35 +887,39 @@ public:    /// \returns SGPR allocation granularity supported by the subtarget.    unsigned getSGPRAllocGranule() const { -    return AMDGPU::IsaInfo::getSGPRAllocGranule(getFeatureBits()); +    return AMDGPU::IsaInfo::getSGPRAllocGranule( +        MCSubtargetInfo::getFeatureBits());    }    /// \returns SGPR encoding granularity supported by the subtarget.    unsigned getSGPREncodingGranule() const { -    return AMDGPU::IsaInfo::getSGPREncodingGranule(getFeatureBits()); +    return AMDGPU::IsaInfo::getSGPREncodingGranule( +        MCSubtargetInfo::getFeatureBits());    }    /// \returns Total number of SGPRs supported by the subtarget.    unsigned getTotalNumSGPRs() const { -    return AMDGPU::IsaInfo::getTotalNumSGPRs(getFeatureBits()); +    return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits());    }    /// \returns Addressable number of SGPRs supported by the subtarget.    unsigned getAddressableNumSGPRs() const { -    return AMDGPU::IsaInfo::getAddressableNumSGPRs(getFeatureBits()); +    return AMDGPU::IsaInfo::getAddressableNumSGPRs( +        MCSubtargetInfo::getFeatureBits());    }    /// \returns Minimum number of SGPRs that meets the given number of waves per    /// execution unit requirement supported by the subtarget.    unsigned getMinNumSGPRs(unsigned WavesPerEU) const { -    return AMDGPU::IsaInfo::getMinNumSGPRs(getFeatureBits(), WavesPerEU); +    return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(), +                                           WavesPerEU);    }    /// \returns Maximum number of SGPRs that meets the given number of waves per    /// execution unit requirement supported by the subtarget.    unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const { -    return AMDGPU::IsaInfo::getMaxNumSGPRs(getFeatureBits(), WavesPerEU, -                                           Addressable); +    return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(), +                                           WavesPerEU, Addressable);    }    /// \returns Reserved number of SGPRs for given function \p MF. @@ -926,34 +937,39 @@ public:    /// \returns VGPR allocation granularity supported by the subtarget.    unsigned getVGPRAllocGranule() const { -    return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits()); +    return AMDGPU::IsaInfo::getVGPRAllocGranule( +        MCSubtargetInfo::getFeatureBits());    }    /// \returns VGPR encoding granularity supported by the subtarget.    unsigned getVGPREncodingGranule() const { -    return AMDGPU::IsaInfo::getVGPREncodingGranule(getFeatureBits()); +    return AMDGPU::IsaInfo::getVGPREncodingGranule( +        MCSubtargetInfo::getFeatureBits());    }    /// \returns Total number of VGPRs supported by the subtarget.    unsigned getTotalNumVGPRs() const { -    return AMDGPU::IsaInfo::getTotalNumVGPRs(getFeatureBits()); +    return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits());    }    /// \returns Addressable number of VGPRs supported by the subtarget.    unsigned getAddressableNumVGPRs() const { -    return AMDGPU::IsaInfo::getAddressableNumVGPRs(getFeatureBits()); +    return AMDGPU::IsaInfo::getAddressableNumVGPRs( +        MCSubtargetInfo::getFeatureBits());    }    /// \returns Minimum number of VGPRs that meets given number of waves per    /// execution unit requirement supported by the subtarget.    unsigned getMinNumVGPRs(unsigned WavesPerEU) const { -    return AMDGPU::IsaInfo::getMinNumVGPRs(getFeatureBits(), WavesPerEU); +    return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(), +                                           WavesPerEU);    }    /// \returns Maximum number of VGPRs that meets given number of waves per    /// execution unit requirement supported by the subtarget.    unsigned getMaxNumVGPRs(unsigned WavesPerEU) const { -    return AMDGPU::IsaInfo::getMaxNumVGPRs(getFeatureBits(), WavesPerEU); +    return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(), +                                           WavesPerEU);    }    /// \returns Maximum number of VGPRs that meets number of waves per execution @@ -971,6 +987,127 @@ public:        const override;  }; + +class R600Subtarget final : public R600GenSubtargetInfo, +                            public AMDGPUCommonSubtarget { +public: +  enum Generation { R600 = 0, R700 = 1, EVERGREEN = 2, NORTHERN_ISLANDS = 3 }; + +private: +  R600InstrInfo InstrInfo; +  R600FrameLowering FrameLowering; +  bool FMA; +  bool CaymanISA; +  bool CFALUBug; +  bool DX10Clamp; +  bool HasVertexCache; +  bool R600ALUInst; +  bool FP64; +  short TexVTXClauseSize; +  Generation Gen; +  R600TargetLowering TLInfo; +  InstrItineraryData InstrItins; +  SelectionDAGTargetInfo TSInfo; +  AMDGPUAS AS; + +public: +  R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS, +                const TargetMachine &TM); + +  const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; } + +  const R600FrameLowering *getFrameLowering() const override { +    return &FrameLowering; +  } + +  const R600TargetLowering *getTargetLowering() const override { +    return &TLInfo; +  } + +  const R600RegisterInfo *getRegisterInfo() const override { +    return &InstrInfo.getRegisterInfo(); +  } + +  const InstrItineraryData *getInstrItineraryData() const override { +    return &InstrItins; +  } + +  // Nothing implemented, just prevent crashes on use. +  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { +    return &TSInfo; +  } + +  void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + +  Generation getGeneration() const { +    return Gen; +  } + +  unsigned getStackAlignment() const { +    return 4; +  } + +  R600Subtarget &initializeSubtargetDependencies(const Triple &TT, +                                                 StringRef GPU, StringRef FS); + +  bool hasBFE() const { +    return (getGeneration() >= EVERGREEN); +  } + +  bool hasBFI() const { +    return (getGeneration() >= EVERGREEN); +  } + +  bool hasBCNT(unsigned Size) const { +    if (Size == 32) +      return (getGeneration() >= EVERGREEN); + +    return false; +  } + +  bool hasBORROW() const { +    return (getGeneration() >= EVERGREEN); +  } + +  bool hasCARRY() const { +    return (getGeneration() >= EVERGREEN); +  } + +  bool hasCaymanISA() const { +    return CaymanISA; +  } + +  bool hasFFBL() const { +    return (getGeneration() >= EVERGREEN); +  } + +  bool hasFFBH() const { +    return (getGeneration() >= EVERGREEN); +  } + +  bool hasFMA() const { return FMA; } + +  unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const { +    return 36; +  } + +  bool hasCFAluBug() const { return CFALUBug; } + +  bool hasVertexCache() const { return HasVertexCache; } + +  short getTexVTXClauseSize() const { return TexVTXClauseSize; } + +  AMDGPUAS getAMDGPUAS() const { return AS; } + +  bool enableMachineScheduler() const override { +    return true; +  } + +  bool enableSubRegLiveness() const override { +    return true; +  } +}; +  } // end namespace llvm  #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 1bcc0ced2d8..4dcb1afd313 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -34,7 +34,6 @@ namespace llvm {  class AMDGPUTargetMachine : public LLVMTargetMachine {  protected:    std::unique_ptr<TargetLoweringObjectFile> TLOF; -  AMDGPUIntrinsicInfo IntrinsicInfo;    AMDGPUAS AS;    StringRef getGPUName(const Function &F) const; @@ -49,12 +48,8 @@ public:                        CodeGenOpt::Level OL);    ~AMDGPUTargetMachine() override; -  const AMDGPUSubtarget *getSubtargetImpl() const; -  const AMDGPUSubtarget *getSubtargetImpl(const Function &) const override = 0; - -  const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override { -    return &IntrinsicInfo; -  } +  const TargetSubtargetInfo *getSubtargetImpl() const; +  const TargetSubtargetInfo *getSubtargetImpl(const Function &) const override = 0;    TargetLoweringObjectFile *getObjFileLowering() const override {      return TLOF.get(); @@ -103,6 +98,7 @@ public:  class GCNTargetMachine final : public AMDGPUTargetMachine {  private: +  AMDGPUIntrinsicInfo IntrinsicInfo;    mutable StringMap<std::unique_ptr<SISubtarget>> SubtargetMap;  public: @@ -117,6 +113,10 @@ public:    TargetTransformInfo getTargetTransformInfo(const Function &F) override; +  const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override { +    return &IntrinsicInfo; +  } +    bool useIPRA() const override {      return true;    } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 7d24b7077bc..a68b8d03f06 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -102,7 +102,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,    unsigned ThresholdPrivate = UnrollThresholdPrivate;    unsigned ThresholdLocal = UnrollThresholdLocal;    unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal); -  AMDGPUAS ASST = ST->getAMDGPUAS(); +  const AMDGPUAS &ASST = AMDGPU::getAMDGPUAS(TargetTriple);    for (const BasicBlock *BB : L->getBlocks()) {      const DataLayout &DL = BB->getModule()->getDataLayout();      unsigned LocalGEPsSeen = 0; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index 495229029fe..b15348d404a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -45,17 +45,12 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {    friend BaseT; -  const AMDGPUSubtarget *ST; -  const AMDGPUTargetLowering *TLI; +  Triple TargetTriple;  public:    explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)      : BaseT(TM, F.getParent()->getDataLayout()), -      ST(TM->getSubtargetImpl(F)), -      TLI(ST->getTargetLowering()) {} - -  const AMDGPUSubtarget *getST() const { return ST; } -  const AMDGPUTargetLowering *getTLI() const { return TLI; } +      TargetTriple(TM->getTargetTriple()) {}    void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,                                 TTI::UnrollingPreferences &UP); @@ -123,7 +118,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {  public:    explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)      : BaseT(TM, F.getParent()->getDataLayout()), -      ST(TM->getSubtargetImpl(F)), +      ST(static_cast<const AMDGPUSubtarget*>(TM->getSubtargetImpl(F))),        TLI(ST->getTargetLowering()),        CommonTTI(TM, F),        IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {} @@ -211,18 +206,18 @@ class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {    friend BaseT; -  const AMDGPUSubtarget *ST; +  const R600Subtarget *ST;    const AMDGPUTargetLowering *TLI;    AMDGPUTTIImpl CommonTTI;  public:    explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)      : BaseT(TM, F.getParent()->getDataLayout()), -      ST(TM->getSubtargetImpl(F)), +      ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),        TLI(ST->getTargetLowering()),        CommonTTI(TM, F)	{} -  const AMDGPUSubtarget *getST() const { return ST; } +  const R600Subtarget *getST() const { return ST; }    const AMDGPUTargetLowering *getTLI() const { return TLI; }    void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, diff --git a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp index 78570928586..11cd49e5b3d 100644 --- a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp @@ -432,19 +432,19 @@ void AMDGPUCFGStructurizer::reversePredicateSetter(    for (;; --I) {      if (I == MBB.end())        continue; -    if (I->getOpcode() == AMDGPU::PRED_X) { +    if (I->getOpcode() == R600::PRED_X) {        switch (I->getOperand(2).getImm()) { -      case AMDGPU::PRED_SETE_INT: -        I->getOperand(2).setImm(AMDGPU::PRED_SETNE_INT); +      case R600::PRED_SETE_INT: +        I->getOperand(2).setImm(R600::PRED_SETNE_INT);          return; -      case AMDGPU::PRED_SETNE_INT: -        I->getOperand(2).setImm(AMDGPU::PRED_SETE_INT); +      case R600::PRED_SETNE_INT: +        I->getOperand(2).setImm(R600::PRED_SETE_INT);          return; -      case AMDGPU::PRED_SETE: -        I->getOperand(2).setImm(AMDGPU::PRED_SETNE); +      case R600::PRED_SETE: +        I->getOperand(2).setImm(R600::PRED_SETNE);          return; -      case AMDGPU::PRED_SETNE: -        I->getOperand(2).setImm(AMDGPU::PRED_SETE); +      case R600::PRED_SETNE: +        I->getOperand(2).setImm(R600::PRED_SETE);          return;        default:          llvm_unreachable("PRED_X Opcode invalid!"); @@ -513,10 +513,10 @@ void AMDGPUCFGStructurizer::insertCondBranchBefore(  int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {    switch(OldOpcode) { -  case AMDGPU::JUMP_COND: -  case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; -  case AMDGPU::BRANCH_COND_i32: -  case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32; +  case R600::JUMP_COND: +  case R600::JUMP: return R600::IF_PREDICATE_SET; +  case R600::BRANCH_COND_i32: +  case R600::BRANCH_COND_f32: return R600::IF_LOGICALNZ_f32;    default: llvm_unreachable("internal error");    }    return -1; @@ -524,10 +524,10 @@ int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {  int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {    switch(OldOpcode) { -  case AMDGPU::JUMP_COND: -  case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; -  case AMDGPU::BRANCH_COND_i32: -  case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32; +  case R600::JUMP_COND: +  case R600::JUMP: return R600::IF_PREDICATE_SET; +  case R600::BRANCH_COND_i32: +  case R600::BRANCH_COND_f32: return R600::IF_LOGICALZ_f32;    default: llvm_unreachable("internal error");    }    return -1; @@ -535,8 +535,8 @@ int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {  int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {    switch(OldOpcode) { -  case AMDGPU::JUMP_COND: -  case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32; +  case R600::JUMP_COND: +  case R600::JUMP: return R600::CONTINUE_LOGICALNZ_i32;    default: llvm_unreachable("internal error");    }    return -1; @@ -544,8 +544,8 @@ int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {  int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {    switch(OldOpcode) { -  case AMDGPU::JUMP_COND: -  case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32; +  case R600::JUMP_COND: +  case R600::JUMP: return R600::CONTINUE_LOGICALZ_i32;    default: llvm_unreachable("internal error");    }    return -1; @@ -573,9 +573,9 @@ AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,  bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {    switch (MI->getOpcode()) { -    case AMDGPU::JUMP_COND: -    case AMDGPU::BRANCH_COND_i32: -    case AMDGPU::BRANCH_COND_f32: return true; +    case R600::JUMP_COND: +    case R600::BRANCH_COND_i32: +    case R600::BRANCH_COND_f32: return true;    default:      return false;    } @@ -584,8 +584,8 @@ bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {  bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {    switch (MI->getOpcode()) { -  case AMDGPU::JUMP: -  case AMDGPU::BRANCH: +  case R600::JUMP: +  case R600::BRANCH:      return true;    default:      return false; @@ -634,7 +634,7 @@ MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {    MachineBasicBlock::reverse_iterator It = MBB->rbegin();    if (It != MBB->rend()) {      MachineInstr *instr = &(*It); -    if (instr->getOpcode() == AMDGPU::RETURN) +    if (instr->getOpcode() == R600::RETURN)        return instr;    }    return nullptr; @@ -687,8 +687,8 @@ void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {     MachineBasicBlock::iterator E = MBB->end();     MachineBasicBlock::iterator It = Pre;     while (It != E) { -     if (Pre->getOpcode() == AMDGPU::CONTINUE -         && It->getOpcode() == AMDGPU::ENDLOOP) +     if (Pre->getOpcode() == R600::CONTINUE +         && It->getOpcode() == R600::ENDLOOP)         ContInstr.push_back(&*Pre);       Pre = It;       ++It; @@ -1303,15 +1303,15 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,    bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2); -  //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL" -  MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF); +  //insert R600::ENDIF to avoid special case "input landBlk == NULL" +  MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, R600::ENDIF);    if (LandBlkHasOtherPred) {      report_fatal_error("Extra register needed to handle CFG");      unsigned CmpResReg =        HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);      report_fatal_error("Extra compare instruction needed to handle CFG"); -    insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, +    insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET,          CmpResReg, DebugLoc());    } @@ -1319,7 +1319,7 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,    // cause an assertion failure in the PostRA scheduling pass.    unsigned InitReg =      HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); -  insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg, +  insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, InitReg,        DebugLoc());    if (MigrateTrue) { @@ -1329,7 +1329,7 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,      // (initVal != 1).      report_fatal_error("Extra register needed to handle CFG");    } -  insertInstrBefore(I, AMDGPU::ELSE); +  insertInstrBefore(I, R600::ELSE);    if (MigrateFalse) {      migrateInstruction(FalseMBB, LandBlk, I); @@ -1341,7 +1341,7 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,    if (LandBlkHasOtherPred) {      // add endif -    insertInstrBefore(I, AMDGPU::ENDIF); +    insertInstrBefore(I, R600::ENDIF);      // put initReg = 2 to other predecessors of landBlk      for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(), @@ -1414,7 +1414,7 @@ void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,    }    if (FalseMBB) { -    insertInstrBefore(I, AMDGPU::ELSE); +    insertInstrBefore(I, R600::ELSE);      MBB->splice(I, FalseMBB, FalseMBB->begin(),                     FalseMBB->end());      MBB->removeSuccessor(FalseMBB, true); @@ -1423,7 +1423,7 @@ void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,      retireBlock(FalseMBB);      MLI->removeBlock(FalseMBB);    } -  insertInstrBefore(I, AMDGPU::ENDIF); +  insertInstrBefore(I, R600::ENDIF);    BranchMI->eraseFromParent(); @@ -1436,8 +1436,8 @@ void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,    LLVM_DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()                      << " land = BB" << LandMBB->getNumber() << "\n";); -  insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc()); -  insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc()); +  insertInstrBefore(DstBlk, R600::WHILELOOP, DebugLoc()); +  insertInstrEnd(DstBlk, R600::ENDLOOP, DebugLoc());    DstBlk->replaceSuccessor(DstBlk, LandMBB);  } @@ -1453,9 +1453,9 @@ void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,    MachineBasicBlock::iterator I = BranchMI;    if (TrueBranch != LandMBB)      reversePredicateSetter(I, *I->getParent()); -  insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL); -  insertInstrBefore(I, AMDGPU::BREAK); -  insertInstrBefore(I, AMDGPU::ENDIF); +  insertCondBranchBefore(ExitingMBB, I, R600::IF_PREDICATE_SET, R600::PREDICATE_BIT, DL); +  insertInstrBefore(I, R600::BREAK); +  insertInstrBefore(I, R600::ENDIF);    //now branchInst can be erase safely    BranchMI->eraseFromParent();    //now take care of successors, retire blocks @@ -1484,8 +1484,8 @@ void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,            getBranchZeroOpcode(OldOpcode);        insertCondBranchBefore(I, BranchOpcode, DL);        // insertEnd to ensure phi-moves, if exist, go before the continue-instr. -      insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL); -      insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL); +      insertInstrEnd(ContingMBB, R600::CONTINUE, DL); +      insertInstrEnd(ContingMBB, R600::ENDIF, DL);      } else {        int BranchOpcode =            TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) : @@ -1500,7 +1500,7 @@ void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,      // location we've just inserted that reference here so it should be      // representative insertEnd to ensure phi-moves, if exist, go before the      // continue-instr. -    insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, +    insertInstrEnd(ContingMBB, R600::CONTINUE,          getLastDebugLocInBB(ContingMBB));    }  } @@ -1627,7 +1627,7 @@ void AMDGPUCFGStructurizer::addDummyExitBlock(      SmallVectorImpl<MachineBasicBlock*> &RetMBB) {    MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();    FuncRep->push_back(DummyExitBlk);  //insert to function -  insertInstrEnd(DummyExitBlk, AMDGPU::RETURN); +  insertInstrEnd(DummyExitBlk, R600::RETURN);    for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(),         E = RetMBB.end(); It != E; ++It) { diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index f2ea3a14d50..5c1e3c952f1 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -4,7 +4,6 @@ tablegen(LLVM AMDGPUGenAsmMatcher.inc -gen-asm-matcher)  tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer)  tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv)  tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel) -tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer)  tablegen(LLVM AMDGPUGenDisassemblerTables.inc -gen-disassembler)  tablegen(LLVM AMDGPUGenInstrInfo.inc -gen-instr-info)  tablegen(LLVM AMDGPUGenIntrinsicEnums.inc -gen-tgt-intrinsic-enums) @@ -19,6 +18,16 @@ tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget)  set(LLVM_TARGET_DEFINITIONS AMDGPUGISel.td)  tablegen(LLVM AMDGPUGenGlobalISel.inc -gen-global-isel) +set(LLVM_TARGET_DEFINITIONS R600.td) +tablegen(LLVM R600GenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM R600GenCallingConv.inc -gen-callingconv) +tablegen(LLVM R600GenDAGISel.inc -gen-dag-isel) +tablegen(LLVM R600GenDFAPacketizer.inc -gen-dfa-packetizer) +tablegen(LLVM R600GenInstrInfo.inc -gen-instr-info) +tablegen(LLVM R600GenMCCodeEmitter.inc -gen-emitter) +tablegen(LLVM R600GenRegisterInfo.inc -gen-register-info) +tablegen(LLVM R600GenSubtargetInfo.inc -gen-subtarget) +  add_public_tablegen_target(AMDGPUCommonTableGen)  add_llvm_target(AMDGPUCodeGen diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index d7908f6902b..f3de903f21b 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -20,6 +20,7 @@  #include "Disassembler/AMDGPUDisassembler.h"  #include "AMDGPU.h"  #include "AMDGPURegisterInfo.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h"  #include "SIDefines.h"  #include "MCTargetDesc/AMDGPUMCTargetDesc.h"  #include "Utils/AMDGPUBaseInfo.h" diff --git a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td index 148b45ba6bb..b5a657d55a9 100644 --- a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td +++ b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td @@ -14,14 +14,13 @@  //===----------------------------------------------------------------------===//  def isEG : Predicate< -  "Subtarget->getGeneration() >= AMDGPUSubtarget::EVERGREEN && " -  "Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS && " +  "Subtarget->getGeneration() >= R600Subtarget::EVERGREEN && "    "!Subtarget->hasCaymanISA()"  >;  def isEGorCayman : Predicate< -  "Subtarget->getGeneration() == AMDGPUSubtarget::EVERGREEN ||" -  "Subtarget->getGeneration() == AMDGPUSubtarget::NORTHERN_ISLANDS" +  "Subtarget->getGeneration() == R600Subtarget::EVERGREEN ||" +  "Subtarget->getGeneration() == R600Subtarget::NORTHERN_ISLANDS"  >;  class EGPat<dag pattern, dag result> : AMDGPUPat<pattern, result> { diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp index 51f926d0a8e..db908368a17 100644 --- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -510,11 +510,6 @@ void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,  void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,                                       const MCSubtargetInfo &STI,                                       raw_ostream &O) { -  if (!STI.getFeatureBits()[AMDGPU::FeatureGCN]) { -    static_cast<R600InstPrinter*>(this)->printOperand(MI, OpNo, O); -    return; -  } -    if (OpNo >= MI->getNumOperands()) {      O << "/*Missing OP" << OpNo << "*/";      return; @@ -965,11 +960,6 @@ void AMDGPUInstPrinter::printVGPRIndexMode(const MCInst *MI, unsigned OpNo,  void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,                                          const MCSubtargetInfo &STI,                                          raw_ostream &O) { -  if (!STI.getFeatureBits()[AMDGPU::FeatureGCN]) { -    static_cast<R600InstPrinter*>(this)->printMemOperand(MI, OpNo, O); -    return; -  } -    printOperand(MI, OpNo, STI, O);    O  << ", ";    printOperand(MI, OpNo + 1, STI, O); @@ -995,16 +985,6 @@ void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,      O << Asm;  } -void AMDGPUInstPrinter::printAbs(const MCInst *MI, unsigned OpNo, -                                 const MCSubtargetInfo &STI, raw_ostream &O) { -  static_cast<R600InstPrinter*>(this)->printAbs(MI, OpNo, O); -} - -void AMDGPUInstPrinter::printClamp(const MCInst *MI, unsigned OpNo, -                                   const MCSubtargetInfo &STI, raw_ostream &O) { -  static_cast<R600InstPrinter*>(this)->printClamp(MI, OpNo, O); -} -  void AMDGPUInstPrinter::printHigh(const MCInst *MI, unsigned OpNo,                                    const MCSubtargetInfo &STI,                                    raw_ostream &O) { @@ -1031,70 +1011,6 @@ void AMDGPUInstPrinter::printOModSI(const MCInst *MI, unsigned OpNo,      O << " div:2";  } -void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo, -                                     const MCSubtargetInfo &STI, -                                     raw_ostream &O) { -  static_cast<R600InstPrinter*>(this)->printLiteral(MI, OpNo, O); -} - -void AMDGPUInstPrinter::printLast(const MCInst *MI, unsigned OpNo, -                                  const MCSubtargetInfo &STI, raw_ostream &O) { -  static_cast<R600InstPrinter*>(this)->printLast(MI, OpNo, O); -} - -void AMDGPUInstPrinter::printNeg(const MCInst *MI, unsigned OpNo, -                                 const MCSubtargetInfo &STI, raw_ostream &O) { -  static_cast<R600InstPrinter*>(this)->printNeg(MI, OpNo, O); -} - -void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo, -                                  const MCSubtargetInfo &STI, raw_ostream &O) { -  static_cast<R600InstPrinter*>(this)->printOMOD(MI, OpNo, O); -} - -void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo, -                                 const MCSubtargetInfo &STI, raw_ostream &O) { -  static_cast<R600InstPrinter*>(this)->printRel(MI, OpNo, O); -} - -void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo, -                                            const MCSubtargetInfo &STI, -                                            raw_ostream &O) { -  static_cast<R600InstPrinter*>(this)->printUpdateExecMask(MI, OpNo, O); -} - -void AMDGPUInstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo, -                                        const MCSubtargetInfo &STI, -                                        raw_ostream &O) { -  static_cast<R600InstPrinter*>(this)->printUpdatePred(MI, OpNo, O); -} - -void AMDGPUInstPrinter::printWrite(const MCInst *MI, unsigned OpNo, -                                   const MCSubtargetInfo &STI, raw_ostream &O) { -  static_cast<R600InstPrinter*>(this)->printWrite(MI, OpNo, O); -} - -void AMDGPUInstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo, -                                         const MCSubtargetInfo &STI, -                                         raw_ostream &O) { -  static_cast<R600InstPrinter*>(this)->printBankSwizzle(MI, OpNo, O); -} - -void AMDGPUInstPrinter::printRSel(const MCInst *MI, unsigned OpNo, -                                  const MCSubtargetInfo &STI, raw_ostream &O) { -  static_cast<R600InstPrinter*>(this)->printRSel(MI, OpNo, O); -} - -void AMDGPUInstPrinter::printCT(const MCInst *MI, unsigned OpNo, -                                const MCSubtargetInfo &STI, raw_ostream &O) { -  static_cast<R600InstPrinter*>(this)->printCT(MI, OpNo, O); -} - -void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo, -                                    const MCSubtargetInfo &STI, raw_ostream &O) { -  static_cast<R600InstPrinter*>(this)->printKCache(MI, OpNo, O); -} -  void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,                                       const MCSubtargetInfo &STI,                                       raw_ostream &O) { @@ -1299,6 +1215,13 @@ void AMDGPUInstPrinter::printHwreg(const MCInst *MI, unsigned OpNo,  #include "AMDGPUGenAsmWriter.inc" +void R600InstPrinter::printInst(const MCInst *MI, raw_ostream &O, +		                StringRef Annot, const MCSubtargetInfo &STI) { +  O.flush(); +  printInstruction(MI, O); +  printAnnotation(O, Annot); +} +  void R600InstPrinter::printAbs(const MCInst *MI, unsigned OpNo,                                 raw_ostream &O) {    AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '|'); @@ -1417,7 +1340,7 @@ void R600InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,    if (Op.isReg()) {      switch (Op.getReg()) {      // This is the default predicate state, so we don't need to print it. -    case AMDGPU::PRED_SEL_OFF: +    case R600::PRED_SEL_OFF:        break;      default: @@ -1493,3 +1416,5 @@ void R600InstPrinter::printWrite(const MCInst *MI, unsigned OpNo,      O << " (MASKED)";    }  } + +#include "R600GenAsmWriter.inc" diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h index 1bce0bbd5dc..11a496a38b2 100644 --- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h @@ -218,13 +218,16 @@ protected:                    raw_ostream &O);  }; -// FIXME: R600 specific parts of AMDGPUInstrPrinter should be moved here, and -// MCTargetDesc should be using R600InstPrinter for the R600 target. -class R600InstPrinter : public AMDGPUInstPrinter { +class R600InstPrinter : public MCInstPrinter {  public:    R600InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,                    const MCRegisterInfo &MRI) -    : AMDGPUInstPrinter(MAI, MII, MRI) {} +    : MCInstPrinter(MAI, MII, MRI) {} + +  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, +                 const MCSubtargetInfo &STI) override; +  void printInstruction(const MCInst *MI, raw_ostream &O); +  static const char *getRegisterName(unsigned RegNo);    void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);    void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp index a7878cb42a7..c579c7d60e1 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -38,9 +38,17 @@ using namespace llvm;  #define GET_SUBTARGETINFO_MC_DESC  #include "AMDGPUGenSubtargetInfo.inc" +#define NoSchedModel NoSchedModelR600 +#define GET_SUBTARGETINFO_MC_DESC +#include "R600GenSubtargetInfo.inc" +#undef NoSchedModelR600 +  #define GET_REGINFO_MC_DESC  #include "AMDGPUGenRegisterInfo.inc" +#define GET_REGINFO_MC_DESC +#include "R600GenRegisterInfo.inc" +  static MCInstrInfo *createAMDGPUMCInstrInfo() {    MCInstrInfo *X = new MCInstrInfo();    InitAMDGPUMCInstrInfo(X); @@ -49,12 +57,17 @@ static MCInstrInfo *createAMDGPUMCInstrInfo() {  static MCRegisterInfo *createAMDGPUMCRegisterInfo(const Triple &TT) {    MCRegisterInfo *X = new MCRegisterInfo(); -  InitAMDGPUMCRegisterInfo(X, 0); +  if (TT.getArch() == Triple::r600) +    InitR600MCRegisterInfo(X, 0); +  else +    InitAMDGPUMCRegisterInfo(X, 0);    return X;  }  static MCSubtargetInfo *  createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { +  if (TT.getArch() == Triple::r600) +    return createR600MCSubtargetInfoImpl(TT, CPU, FS);    return createAMDGPUMCSubtargetInfoImpl(TT, CPU, FS);  } @@ -63,8 +76,10 @@ static MCInstPrinter *createAMDGPUMCInstPrinter(const Triple &T,                                                  const MCAsmInfo &MAI,                                                  const MCInstrInfo &MII,                                                  const MCRegisterInfo &MRI) { -  return T.getArch() == Triple::r600 ? new R600InstPrinter(MAI, MII, MRI) :  -                                       new AMDGPUInstPrinter(MAI, MII, MRI); +  if (T.getArch() == Triple::r600) +    return new R600InstPrinter(MAI, MII, MRI); +  else +    return new AMDGPUInstPrinter(MAI, MII, MRI);  }  static MCTargetStreamer *createAMDGPUAsmTargetStreamer(MCStreamer &S, @@ -90,10 +105,12 @@ static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context,  }  extern "C" void LLVMInitializeAMDGPUTargetMC() { + +  TargetRegistry::RegisterMCInstrInfo(getTheGCNTarget(), createAMDGPUMCInstrInfo); +  TargetRegistry::RegisterMCInstrInfo(getTheAMDGPUTarget(), createR600MCInstrInfo);    for (Target *T : {&getTheAMDGPUTarget(), &getTheGCNTarget()}) {      RegisterMCAsmInfo<AMDGPUMCAsmInfo> X(*T); -    TargetRegistry::RegisterMCInstrInfo(*T, createAMDGPUMCInstrInfo);      TargetRegistry::RegisterMCRegInfo(*T, createAMDGPUMCRegisterInfo);      TargetRegistry::RegisterMCSubtargetInfo(*T, createAMDGPUMCSubtargetInfo);      TargetRegistry::RegisterMCInstPrinter(*T, createAMDGPUMCInstPrinter); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h index a2e3c99bb58..f3628d96d6e 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h @@ -40,6 +40,7 @@ Target &getTheGCNTarget();  MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,                                         const MCRegisterInfo &MRI,                                         MCContext &Ctx); +MCInstrInfo *createR600MCInstrInfo();  MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,                                       const MCRegisterInfo &MRI, @@ -59,6 +60,10 @@ createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI,  #include "AMDGPUGenRegisterInfo.inc"  #undef GET_REGINFO_ENUM +#define GET_REGINFO_ENUM +#include "R600GenRegisterInfo.inc" +#undef GET_REGINFO_ENUM +  #define GET_INSTRINFO_ENUM  #define GET_INSTRINFO_OPERAND_ENUM  #define GET_INSTRINFO_SCHED_ENUM @@ -67,9 +72,20 @@ createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI,  #undef GET_INSTRINFO_OPERAND_ENUM  #undef GET_INSTRINFO_ENUM +#define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_OPERAND_ENUM +#define GET_INSTRINFO_SCHED_ENUM +#include "R600GenInstrInfo.inc" +#undef GET_INSTRINFO_SCHED_ENUM +#undef GET_INSTRINFO_OPERAND_ENUM +#undef GET_INSTRINFO_ENUM  #define GET_SUBTARGETINFO_ENUM  #include "AMDGPUGenSubtargetInfo.inc"  #undef GET_SUBTARGETINFO_ENUM +#define GET_SUBTARGETINFO_ENUM +#include "R600GenSubtargetInfo.inc" +#undef GET_SUBTARGETINFO_ENUM +  #endif diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt index f9cb4678dc5..d63fb96f9d0 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt @@ -8,5 +8,6 @@ add_llvm_library(LLVMAMDGPUDesc    AMDGPUMCTargetDesc.cpp    AMDGPUTargetStreamer.cpp    R600MCCodeEmitter.cpp +  R600MCTargetDesc.cpp    SIMCCodeEmitter.cpp  ) diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp index 3968a294cb1..28d4bc1829e 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp @@ -15,7 +15,6 @@  //===----------------------------------------------------------------------===//  #include "MCTargetDesc/AMDGPUFixupKinds.h" -#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"  #include "MCTargetDesc/AMDGPUMCTargetDesc.h"  #include "R600Defines.h"  #include "llvm/MC/MCCodeEmitter.h" @@ -36,30 +35,40 @@ using namespace llvm;  namespace { -class R600MCCodeEmitter : public AMDGPUMCCodeEmitter { +class R600MCCodeEmitter : public MCCodeEmitter {    const MCRegisterInfo &MRI; +  const MCInstrInfo &MCII;  public:    R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri) -    : AMDGPUMCCodeEmitter(mcii), MRI(mri) {} +    : MRI(mri), MCII(mcii) {}    R600MCCodeEmitter(const R600MCCodeEmitter &) = delete;    R600MCCodeEmitter &operator=(const R600MCCodeEmitter &) = delete;    /// Encode the instruction and write it to the OS.    void encodeInstruction(const MCInst &MI, raw_ostream &OS,                           SmallVectorImpl<MCFixup> &Fixups, -                         const MCSubtargetInfo &STI) const override; +                         const MCSubtargetInfo &STI) const;    /// \returns the encoding for an MCOperand.    uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,                               SmallVectorImpl<MCFixup> &Fixups, -                             const MCSubtargetInfo &STI) const override; +                             const MCSubtargetInfo &STI) const;  private: +    void Emit(uint32_t value, raw_ostream &OS) const;    void Emit(uint64_t value, raw_ostream &OS) const;    unsigned getHWReg(unsigned regNo) const; + +  uint64_t getBinaryCodeForInstr(const MCInst &MI, +                                 SmallVectorImpl<MCFixup> &Fixups, +                                 const MCSubtargetInfo &STI) const; +  uint64_t computeAvailableFeatures(const FeatureBitset &FB) const; +  void verifyInstructionPredicates(const MCInst &MI, +                                   uint64_t AvailableFeatures) const; +  };  } // end anonymous namespace @@ -94,16 +103,16 @@ void R600MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,                                computeAvailableFeatures(STI.getFeatureBits()));    const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); -  if (MI.getOpcode() == AMDGPU::RETURN || -    MI.getOpcode() == AMDGPU::FETCH_CLAUSE || -    MI.getOpcode() == AMDGPU::ALU_CLAUSE || -    MI.getOpcode() == AMDGPU::BUNDLE || -    MI.getOpcode() == AMDGPU::KILL) { +  if (MI.getOpcode() == R600::RETURN || +    MI.getOpcode() == R600::FETCH_CLAUSE || +    MI.getOpcode() == R600::ALU_CLAUSE || +    MI.getOpcode() == R600::BUNDLE || +    MI.getOpcode() == R600::KILL) {      return;    } else if (IS_VTX(Desc)) {      uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups, STI);      uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset -    if (!(STI.getFeatureBits()[AMDGPU::FeatureCaymanISA])) { +    if (!(STI.getFeatureBits()[R600::FeatureCaymanISA])) {        InstWord2 |= 1 << 19; // Mega-Fetch bit      } @@ -136,7 +145,7 @@ void R600MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,        Emit((uint32_t) 0, OS);    } else {      uint64_t Inst = getBinaryCodeForInstr(MI, Fixups, STI); -    if ((STI.getFeatureBits()[AMDGPU::FeatureR600ALUInst]) && +    if ((STI.getFeatureBits()[R600::FeatureR600ALUInst]) &&         ((Desc.TSFlags & R600_InstFlag::OP1) ||           Desc.TSFlags & R600_InstFlag::OP2)) {        uint64_t ISAOpCode = Inst & (0x3FFULL << 39); @@ -186,4 +195,4 @@ uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,  }  #define ENABLE_INSTR_PREDICATE_VERIFIER -#include "AMDGPUGenMCCodeEmitter.inc" +#include "R600GenMCCodeEmitter.inc" diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp new file mode 100644 index 00000000000..1c99a708e5a --- /dev/null +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp @@ -0,0 +1,27 @@ +//===-- R600MCTargetDesc.cpp - R600 Target Descriptions -------------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief This file provides R600 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUMCTargetDesc.h" +#include "llvm/MC/MCInstrInfo.h" + +using namespace llvm; + +#define GET_INSTRINFO_MC_DESC +#include "R600GenInstrInfo.inc" + +MCInstrInfo *llvm::createR600MCInstrInfo() { +  MCInstrInfo *X = new MCInstrInfo(); +  InitR600MCInstrInfo(X); +  return X; +} diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp index e3e23d73f41..36913bd0427 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -438,3 +438,6 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,    llvm_unreachable("Encoding of this operand type is not supported yet.");    return 0;  } + +#define ENABLE_INSTR_PREDICATE_VERIFIER +#include "AMDGPUGenMCCodeEmitter.inc" diff --git a/llvm/lib/Target/AMDGPU/R600.td b/llvm/lib/Target/AMDGPU/R600.td new file mode 100644 index 00000000000..ff96928211c --- /dev/null +++ b/llvm/lib/Target/AMDGPU/R600.td @@ -0,0 +1,59 @@ +//===-- R600.td - R600 Tablegen files ----------------------*- tablegen -*-===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +def R600InstrInfo : InstrInfo { +  let guessInstructionProperties = 1; +  let noNamedPositionallyEncodedOperands = 1; +} + +def R600 : Target { +  let InstructionSet = R600InstrInfo; +  let AllowRegisterRenaming = 1; +} + +let Namespace = "R600" in { + +foreach Index = 0-15 in { +  def sub#Index : SubRegIndex<32, !shl(Index, 5)>; +} + +include "R600RegisterInfo.td" + +} + +def NullALU : InstrItinClass; +def ALU_NULL : FuncUnit; + +include "AMDGPUFeatures.td" +include "R600Schedule.td" +include "R600Processors.td" +include "AMDGPUInstrInfo.td" +include "AMDGPUInstructions.td" +include "R600Instructions.td" +include "R700Instructions.td" +include "EvergreenInstructions.td" +include "CaymanInstructions.td" + +// Calling convention for R600 +def CC_R600 : CallingConv<[ +  CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[ +    T0_XYZW, T1_XYZW, T2_XYZW, T3_XYZW, T4_XYZW, T5_XYZW, T6_XYZW, T7_XYZW, +    T8_XYZW, T9_XYZW, T10_XYZW, T11_XYZW, T12_XYZW, T13_XYZW, T14_XYZW, T15_XYZW, +    T16_XYZW, T17_XYZW, T18_XYZW, T19_XYZW, T20_XYZW, T21_XYZW, T22_XYZW, +    T23_XYZW, T24_XYZW, T25_XYZW, T26_XYZW, T27_XYZW, T28_XYZW, T29_XYZW, +    T30_XYZW, T31_XYZW, T32_XYZW +  ]>>> +]>; + +// Calling convention for compute kernels +def CC_R600_Kernel : CallingConv<[ +  CCCustom<"allocateKernArg"> +]>; diff --git a/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp b/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp index ea247a07329..25fdc430025 100644 --- a/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp @@ -51,7 +51,7 @@ void R600AsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {    for (const MachineBasicBlock &MBB : MF) {      for (const MachineInstr &MI : MBB) { -      if (MI.getOpcode() == AMDGPU::KILLGT) +      if (MI.getOpcode() == R600::KILLGT)          killPixel = true;        unsigned numOperands = MI.getNumOperands();        for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) { diff --git a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp index 49c4d2223a0..0c62d6a4b3d 100644 --- a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp +++ b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp @@ -34,8 +34,8 @@ namespace {  static bool isCFAlu(const MachineInstr &MI) {    switch (MI.getOpcode()) { -  case AMDGPU::CF_ALU: -  case AMDGPU::CF_ALU_PUSH_BEFORE: +  case R600::CF_ALU: +  case R600::CF_ALU_PUSH_BEFORE:      return true;    default:      return false; @@ -85,20 +85,20 @@ char &llvm::R600ClauseMergePassID = R600ClauseMergePass::ID;  unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr &MI) const {    assert(isCFAlu(MI));    return MI -      .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::COUNT)) +      .getOperand(TII->getOperandIdx(MI.getOpcode(), R600::OpName::COUNT))        .getImm();  }  bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr &MI) const {    assert(isCFAlu(MI));    return MI -      .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::Enabled)) +      .getOperand(TII->getOperandIdx(MI.getOpcode(), R600::OpName::Enabled))        .getImm();  }  void R600ClauseMergePass::cleanPotentialDisabledCFAlu(      MachineInstr &CFAlu) const { -  int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT); +  int CntIdx = TII->getOperandIdx(R600::CF_ALU, R600::OpName::COUNT);    MachineBasicBlock::iterator I = CFAlu, E = CFAlu.getParent()->end();    I++;    do { @@ -117,7 +117,7 @@ void R600ClauseMergePass::cleanPotentialDisabledCFAlu(  bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,                                            const MachineInstr &LatrCFAlu) const {    assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu)); -  int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT); +  int CntIdx = TII->getOperandIdx(R600::CF_ALU, R600::OpName::COUNT);    unsigned RootInstCount = getCFAluSize(RootCFAlu),        LaterInstCount = getCFAluSize(LatrCFAlu);    unsigned CumuledInsts = RootInstCount + LaterInstCount; @@ -125,15 +125,15 @@ bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,      LLVM_DEBUG(dbgs() << "Excess inst counts\n");      return false;    } -  if (RootCFAlu.getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) +  if (RootCFAlu.getOpcode() == R600::CF_ALU_PUSH_BEFORE)      return false;    // Is KCache Bank 0 compatible ?    int Mode0Idx = -      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0); +      TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_MODE0);    int KBank0Idx = -      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0); +      TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_BANK0);    int KBank0LineIdx = -      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0); +      TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_ADDR0);    if (LatrCFAlu.getOperand(Mode0Idx).getImm() &&        RootCFAlu.getOperand(Mode0Idx).getImm() &&        (LatrCFAlu.getOperand(KBank0Idx).getImm() != @@ -145,11 +145,11 @@ bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,    }    // Is KCache Bank 1 compatible ?    int Mode1Idx = -      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1); +      TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_MODE1);    int KBank1Idx = -      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1); +      TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_BANK1);    int KBank1LineIdx = -      TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1); +      TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_ADDR1);    if (LatrCFAlu.getOperand(Mode1Idx).getImm() &&        RootCFAlu.getOperand(Mode1Idx).getImm() &&        (LatrCFAlu.getOperand(KBank1Idx).getImm() != diff --git a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp index f585eaa3c9d..f447fc6576c 100644 --- a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp +++ b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -94,7 +94,7 @@ bool CFStack::branchStackContains(CFStack::StackItem Item) {  }  bool CFStack::requiresWorkAroundForInst(unsigned Opcode) { -  if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() && +  if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&        getLoopDepth() > 1)      return true; @@ -103,10 +103,10 @@ bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {    switch(Opcode) {    default: return false; -  case AMDGPU::CF_ALU_PUSH_BEFORE: -  case AMDGPU::CF_ALU_ELSE_AFTER: -  case AMDGPU::CF_ALU_BREAK: -  case AMDGPU::CF_ALU_CONTINUE: +  case R600::CF_ALU_PUSH_BEFORE: +  case R600::CF_ALU_ELSE_AFTER: +  case R600::CF_ALU_BREAK: +  case R600::CF_ALU_CONTINUE:      if (CurrentSubEntries == 0)        return false;      if (ST->getWavefrontSize() == 64) { @@ -168,8 +168,8 @@ void CFStack::updateMaxStackSize() {  void CFStack::pushBranch(unsigned Opcode, bool isWQM) {    CFStack::StackItem Item = CFStack::ENTRY;    switch(Opcode) { -  case AMDGPU::CF_PUSH_EG: -  case AMDGPU::CF_ALU_PUSH_BEFORE: +  case R600::CF_PUSH_EG: +  case R600::CF_ALU_PUSH_BEFORE:      if (!isWQM) {        if (!ST->hasCaymanISA() &&            !branchStackContains(CFStack::FIRST_NON_WQM_PUSH)) @@ -240,8 +240,8 @@ private:    bool IsTrivialInst(MachineInstr &MI) const {      switch (MI.getOpcode()) { -    case AMDGPU::KILL: -    case AMDGPU::RETURN: +    case R600::KILL: +    case R600::RETURN:        return true;      default:        return false; @@ -253,41 +253,41 @@ private:      bool isEg = (ST->getGeneration() >= R600Subtarget::EVERGREEN);      switch (CFI) {      case CF_TC: -      Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600; +      Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600;        break;      case CF_VC: -      Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600; +      Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600;        break;      case CF_CALL_FS: -      Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600; +      Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600;        break;      case CF_WHILE_LOOP: -      Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600; +      Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600;        break;      case CF_END_LOOP: -      Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600; +      Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600;        break;      case CF_LOOP_BREAK: -      Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600; +      Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600;        break;      case CF_LOOP_CONTINUE: -      Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600; +      Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600;        break;      case CF_JUMP: -      Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600; +      Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600;        break;      case CF_ELSE: -      Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600; +      Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600;        break;      case CF_POP: -      Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600; +      Opcode = isEg ? R600::POP_EG : R600::POP_R600;        break;      case CF_END:        if (ST->hasCaymanISA()) { -        Opcode = AMDGPU::CF_END_CM; +        Opcode = R600::CF_END_CM;          break;        } -      Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600; +      Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600;        break;      }      assert (Opcode && "No opcode selected"); @@ -305,21 +305,21 @@ private:          continue;        if (MO.isDef()) {          unsigned Reg = MO.getReg(); -        if (AMDGPU::R600_Reg128RegClass.contains(Reg)) +        if (R600::R600_Reg128RegClass.contains(Reg))            DstMI = Reg;          else            DstMI = TRI->getMatchingSuperReg(Reg,                AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), -              &AMDGPU::R600_Reg128RegClass); +              &R600::R600_Reg128RegClass);        }        if (MO.isUse()) {          unsigned Reg = MO.getReg(); -        if (AMDGPU::R600_Reg128RegClass.contains(Reg)) +        if (R600::R600_Reg128RegClass.contains(Reg))            SrcMI = Reg;          else            SrcMI = TRI->getMatchingSuperReg(Reg,                AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), -              &AMDGPU::R600_Reg128RegClass); +              &R600::R600_Reg128RegClass);        }      }      if ((DstRegs.find(SrcMI) == DstRegs.end())) { @@ -359,15 +359,15 @@ private:    void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const {      static const unsigned LiteralRegs[] = { -      AMDGPU::ALU_LITERAL_X, -      AMDGPU::ALU_LITERAL_Y, -      AMDGPU::ALU_LITERAL_Z, -      AMDGPU::ALU_LITERAL_W +      R600::ALU_LITERAL_X, +      R600::ALU_LITERAL_Y, +      R600::ALU_LITERAL_Z, +      R600::ALU_LITERAL_W      };      const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs =          TII->getSrcs(MI);      for (const auto &Src:Srcs) { -      if (Src.first->getReg() != AMDGPU::ALU_LITERAL_X) +      if (Src.first->getReg() != R600::ALU_LITERAL_X)          continue;        int64_t Imm = Src.second;        std::vector<MachineOperand *>::iterator It = @@ -377,7 +377,7 @@ private:        // Get corresponding Operand        MachineOperand &Operand = MI.getOperand( -          TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal)); +          TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal));        if (It != Lits.end()) {          // Reuse existing literal reg @@ -400,7 +400,7 @@ private:        unsigned LiteralPair0 = Literals[i];        unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;        InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(), -          TII->get(AMDGPU::LITERALS)) +          TII->get(R600::LITERALS))            .addImm(LiteralPair0)            .addImm(LiteralPair1);      } @@ -442,7 +442,7 @@ private:        }        for (unsigned i = 0, e = Literals.size(); i < e; i += 2) {          MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(), -            TII->get(AMDGPU::LITERALS)); +            TII->get(R600::LITERALS));          if (Literals[i]->isImm()) {              MILit.addImm(Literals[i]->getImm());          } else { @@ -471,7 +471,7 @@ private:                         unsigned &CfCount) {      CounterPropagateAddr(*Clause.first, CfCount);      MachineBasicBlock *BB = Clause.first->getParent(); -    BuildMI(BB, DL, TII->get(AMDGPU::FETCH_CLAUSE)).addImm(CfCount); +    BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount);      for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {        BB->splice(InsertPos, BB, Clause.second[i]);      } @@ -483,7 +483,7 @@ private:      Clause.first->getOperand(0).setImm(0);      CounterPropagateAddr(*Clause.first, CfCount);      MachineBasicBlock *BB = Clause.first->getParent(); -    BuildMI(BB, DL, TII->get(AMDGPU::ALU_CLAUSE)).addImm(CfCount); +    BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount);      for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {        BB->splice(InsertPos, BB, Clause.second[i]);      } @@ -540,34 +540,34 @@ public:          }          MachineBasicBlock::iterator MI = I; -        if (MI->getOpcode() != AMDGPU::ENDIF) +        if (MI->getOpcode() != R600::ENDIF)            LastAlu.back() = nullptr; -        if (MI->getOpcode() == AMDGPU::CF_ALU) +        if (MI->getOpcode() == R600::CF_ALU)            LastAlu.back() = &*MI;          I++;          bool RequiresWorkAround =              CFStack.requiresWorkAroundForInst(MI->getOpcode());          switch (MI->getOpcode()) { -        case AMDGPU::CF_ALU_PUSH_BEFORE: +        case R600::CF_ALU_PUSH_BEFORE:            if (RequiresWorkAround) {              LLVM_DEBUG(dbgs()                         << "Applying bug work-around for ALU_PUSH_BEFORE\n"); -            BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG)) +            BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG))                  .addImm(CfCount + 1)                  .addImm(1); -            MI->setDesc(TII->get(AMDGPU::CF_ALU)); +            MI->setDesc(TII->get(R600::CF_ALU));              CfCount++; -            CFStack.pushBranch(AMDGPU::CF_PUSH_EG); +            CFStack.pushBranch(R600::CF_PUSH_EG);            } else -            CFStack.pushBranch(AMDGPU::CF_ALU_PUSH_BEFORE); +            CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);            LLVM_FALLTHROUGH; -        case AMDGPU::CF_ALU: +        case R600::CF_ALU:            I = MI;            AluClauses.push_back(MakeALUClause(MBB, I));            LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););            CfCount++;            break; -        case AMDGPU::WHILELOOP: { +        case R600::WHILELOOP: {            CFStack.pushLoop();            MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),                getHWInstrDesc(CF_WHILE_LOOP)) @@ -580,7 +580,7 @@ public:            CfCount++;            break;          } -        case AMDGPU::ENDLOOP: { +        case R600::ENDLOOP: {            CFStack.popLoop();            std::pair<unsigned, std::set<MachineInstr *>> Pair =                std::move(LoopStack.back()); @@ -592,7 +592,7 @@ public:            CfCount++;            break;          } -        case AMDGPU::IF_PREDICATE_SET: { +        case R600::IF_PREDICATE_SET: {            LastAlu.push_back(nullptr);            MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),                getHWInstrDesc(CF_JUMP)) @@ -604,7 +604,7 @@ public:            CfCount++;            break;          } -        case AMDGPU::ELSE: { +        case R600::ELSE: {            MachineInstr * JumpInst = IfThenElseStack.back();            IfThenElseStack.pop_back();            CounterPropagateAddr(*JumpInst, CfCount); @@ -618,7 +618,7 @@ public:            CfCount++;            break;          } -        case AMDGPU::ENDIF: { +        case R600::ENDIF: {            CFStack.popBranch();            if (LastAlu.back()) {              ToPopAfter.push_back(LastAlu.back()); @@ -640,7 +640,7 @@ public:            MI->eraseFromParent();            break;          } -        case AMDGPU::BREAK: { +        case R600::BREAK: {            CfCount ++;            MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),                getHWInstrDesc(CF_LOOP_BREAK)) @@ -649,7 +649,7 @@ public:            MI->eraseFromParent();            break;          } -        case AMDGPU::CONTINUE: { +        case R600::CONTINUE: {            MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),                getHWInstrDesc(CF_LOOP_CONTINUE))                .addImm(0); @@ -658,12 +658,12 @@ public:            CfCount++;            break;          } -        case AMDGPU::RETURN: { +        case R600::RETURN: {            DebugLoc DL = MBB.findDebugLoc(MI);            BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END));            CfCount++;            if (CfCount % 2) { -            BuildMI(MBB, I, DL, TII->get(AMDGPU::PAD)); +            BuildMI(MBB, I, DL, TII->get(R600::PAD));              CfCount++;            }            MI->eraseFromParent(); @@ -684,7 +684,7 @@ public:        for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {          MachineInstr *Alu = ToPopAfter[i];          BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu), -            TII->get(AMDGPU::CF_ALU_POP_AFTER)) +            TII->get(R600::CF_ALU_POP_AFTER))              .addImm(Alu->getOperand(0).getImm())              .addImm(Alu->getOperand(1).getImm())              .addImm(Alu->getOperand(2).getImm()) diff --git a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp index bfd861c6140..1683fe6c9a5 100644 --- a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp +++ b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp @@ -52,12 +52,12 @@ private:    unsigned OccupiedDwords(MachineInstr &MI) const {      switch (MI.getOpcode()) { -    case AMDGPU::INTERP_PAIR_XY: -    case AMDGPU::INTERP_PAIR_ZW: -    case AMDGPU::INTERP_VEC_LOAD: -    case AMDGPU::DOT_4: +    case R600::INTERP_PAIR_XY: +    case R600::INTERP_PAIR_ZW: +    case R600::INTERP_VEC_LOAD: +    case R600::DOT_4:        return 4; -    case AMDGPU::KILL: +    case R600::KILL:        return 0;      default:        break; @@ -77,7 +77,7 @@ private:                                      E = MI.operands_end();           It != E; ++It) {        MachineOperand &MO = *It; -      if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X) +      if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X)          ++NumLiteral;      }      return 1 + NumLiteral; @@ -89,12 +89,12 @@ private:      if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()))        return true;      switch (MI.getOpcode()) { -    case AMDGPU::PRED_X: -    case AMDGPU::INTERP_PAIR_XY: -    case AMDGPU::INTERP_PAIR_ZW: -    case AMDGPU::INTERP_VEC_LOAD: -    case AMDGPU::COPY: -    case AMDGPU::DOT_4: +    case R600::PRED_X: +    case R600::INTERP_PAIR_XY: +    case R600::INTERP_PAIR_ZW: +    case R600::INTERP_VEC_LOAD: +    case R600::COPY: +    case R600::DOT_4:        return true;      default:        return false; @@ -103,9 +103,9 @@ private:    bool IsTrivialInst(MachineInstr &MI) const {      switch (MI.getOpcode()) { -    case AMDGPU::KILL: -    case AMDGPU::RETURN: -    case AMDGPU::IMPLICIT_DEF: +    case R600::KILL: +    case R600::RETURN: +    case R600::IMPLICIT_DEF:        return true;      default:        return false; @@ -132,16 +132,16 @@ private:                         bool UpdateInstr = true) const {      std::vector<std::pair<unsigned, unsigned>> UsedKCache; -    if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != AMDGPU::DOT_4) +    if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != R600::DOT_4)        return true;      const SmallVectorImpl<std::pair<MachineOperand *, int64_t>> &Consts =          TII->getSrcs(MI);      assert( -        (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == AMDGPU::DOT_4) && +        (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == R600::DOT_4) &&          "Can't assign Const");      for (unsigned i = 0, n = Consts.size(); i < n; ++i) { -      if (Consts[i].first->getReg() != AMDGPU::ALU_CONST) +      if (Consts[i].first->getReg() != R600::ALU_CONST)          continue;        unsigned Sel = Consts[i].second;        unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31; @@ -172,16 +172,16 @@ private:        return true;      for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) { -      if (Consts[i].first->getReg() != AMDGPU::ALU_CONST) +      if (Consts[i].first->getReg() != R600::ALU_CONST)          continue;        switch(UsedKCache[j].first) {        case 0:          Consts[i].first->setReg( -            AMDGPU::R600_KC0RegClass.getRegister(UsedKCache[j].second)); +            R600::R600_KC0RegClass.getRegister(UsedKCache[j].second));          break;        case 1:          Consts[i].first->setReg( -            AMDGPU::R600_KC1RegClass.getRegister(UsedKCache[j].second)); +            R600::R600_KC1RegClass.getRegister(UsedKCache[j].second));          break;        default:          llvm_unreachable("Wrong Cache Line"); @@ -253,7 +253,7 @@ private:          break;        if (AluInstCount > TII->getMaxAlusPerClause())          break; -      if (I->getOpcode() == AMDGPU::PRED_X) { +      if (I->getOpcode() == R600::PRED_X) {          // We put PRED_X in its own clause to ensure that ifcvt won't create          // clauses with more than 128 insts.          // IfCvt is indeed checking that "then" and "else" branches of an if @@ -289,7 +289,7 @@ private:        AluInstCount += OccupiedDwords(*I);      }      unsigned Opcode = PushBeforeModifier ? -        AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU; +        R600::CF_ALU_PUSH_BEFORE : R600::CF_ALU;      BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))      // We don't use the ADDR field until R600ControlFlowFinalizer pass, where      // it is safe to assume it is 0. However if we always put 0 here, the ifcvt @@ -322,7 +322,7 @@ public:                                                      BB != BB_E; ++BB) {        MachineBasicBlock &MBB = *BB;        MachineBasicBlock::iterator I = MBB.begin(); -      if (I != MBB.end() && I->getOpcode() == AMDGPU::CF_ALU) +      if (I != MBB.end() && I->getOpcode() == R600::CF_ALU)          continue; // BB was already parsed        for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {          if (isALU(*I)) { diff --git a/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp index 74d88b09415..b924ff019dd 100644 --- a/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp +++ b/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp @@ -96,16 +96,16 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {        // Expand LDS_*_RET instructions        if (TII->isLDSRetInstr(MI.getOpcode())) { -        int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst); +        int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);          assert(DstIdx != -1);          MachineOperand &DstOp = MI.getOperand(DstIdx);          MachineInstr *Mov = TII->buildMovInstr(&MBB, I, -                                               DstOp.getReg(), AMDGPU::OQAP); -        DstOp.setReg(AMDGPU::OQAP); +                                               DstOp.getReg(), R600::OQAP); +        DstOp.setReg(R600::OQAP);          int LDSPredSelIdx = TII->getOperandIdx(MI.getOpcode(), -                                           AMDGPU::OpName::pred_sel); +                                           R600::OpName::pred_sel);          int MovPredSelIdx = TII->getOperandIdx(Mov->getOpcode(), -                                           AMDGPU::OpName::pred_sel); +                                           R600::OpName::pred_sel);          // Copy the pred_sel bit          Mov->getOperand(MovPredSelIdx).setReg(              MI.getOperand(LDSPredSelIdx).getReg()); @@ -114,7 +114,7 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {        switch (MI.getOpcode()) {        default: break;        // Expand PRED_X to one of the PRED_SET instructions. -      case AMDGPU::PRED_X: { +      case R600::PRED_X: {          uint64_t Flags = MI.getOperand(3).getImm();          // The native opcode used by PRED_X is stored as an immediate in the          // third operand. @@ -122,17 +122,18 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {                                              MI.getOperand(2).getImm(), // opcode                                              MI.getOperand(0).getReg(), // dst                                              MI.getOperand(1).getReg(), // src0 -                                            AMDGPU::ZERO);             // src1 +                                            R600::ZERO);             // src1          TII->addFlag(*PredSet, 0, MO_FLAG_MASK);          if (Flags & MO_FLAG_PUSH) { -          TII->setImmOperand(*PredSet, AMDGPU::OpName::update_exec_mask, 1); +          TII->setImmOperand(*PredSet, R600::OpName::update_exec_mask, 1);          } else { -          TII->setImmOperand(*PredSet, AMDGPU::OpName::update_pred, 1); +          TII->setImmOperand(*PredSet, R600::OpName::update_pred, 1);          }          MI.eraseFromParent();          continue;          } -      case AMDGPU::DOT_4: { +      case R600::DOT_4: { +          const R600RegisterInfo &TRI = TII->getRegisterInfo();          unsigned DstReg = MI.getOperand(0).getReg(); @@ -141,7 +142,7 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {          for (unsigned Chan = 0; Chan < 4; ++Chan) {            bool Mask = (Chan != TRI.getHWRegChan(DstReg));            unsigned SubDstReg = -              AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); +              R600::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);            MachineInstr *BMI =                TII->buildSlotOfVectorInstruction(MBB, &MI, Chan, SubDstReg);            if (Chan > 0) { @@ -156,10 +157,10 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {            // While not strictly necessary from hw point of view, we force            // all src operands of a dot4 inst to belong to the same slot.            unsigned Src0 = BMI->getOperand( -              TII->getOperandIdx(Opcode, AMDGPU::OpName::src0)) +              TII->getOperandIdx(Opcode, R600::OpName::src0))                .getReg();            unsigned Src1 = BMI->getOperand( -              TII->getOperandIdx(Opcode, AMDGPU::OpName::src1)) +              TII->getOperandIdx(Opcode, R600::OpName::src1))                .getReg();            (void) Src0;            (void) Src1; @@ -206,14 +207,14 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {        // T0_W = CUBE T1_Y, T1_Z        for (unsigned Chan = 0; Chan < 4; Chan++) {          unsigned DstReg = MI.getOperand( -                            TII->getOperandIdx(MI, AMDGPU::OpName::dst)).getReg(); +                            TII->getOperandIdx(MI, R600::OpName::dst)).getReg();          unsigned Src0 = MI.getOperand( -                           TII->getOperandIdx(MI, AMDGPU::OpName::src0)).getReg(); +                           TII->getOperandIdx(MI, R600::OpName::src0)).getReg();          unsigned Src1 = 0;          // Determine the correct source registers          if (!IsCube) { -          int Src1Idx = TII->getOperandIdx(MI, AMDGPU::OpName::src1); +          int Src1Idx = TII->getOperandIdx(MI, R600::OpName::src1);            if (Src1Idx != -1) {              Src1 = MI.getOperand(Src1Idx).getReg();            } @@ -241,7 +242,7 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {            // the current Channel.            Mask = (Chan != TRI.getHWRegChan(DstReg));            unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK; -          DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); +          DstReg = R600::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);          }          // Set the IsLast bit @@ -250,11 +251,11 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {          // Add the new instruction          unsigned Opcode = MI.getOpcode();          switch (Opcode) { -        case AMDGPU::CUBE_r600_pseudo: -          Opcode = AMDGPU::CUBE_r600_real; +        case R600::CUBE_r600_pseudo: +          Opcode = R600::CUBE_r600_real;            break; -        case AMDGPU::CUBE_eg_pseudo: -          Opcode = AMDGPU::CUBE_eg_real; +        case R600::CUBE_eg_pseudo: +          Opcode = R600::CUBE_eg_real;            break;          default:            break; @@ -271,12 +272,12 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {          if (NotLast) {            TII->addFlag(*NewMI, 0, MO_FLAG_NOT_LAST);          } -        SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::clamp); -        SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::literal); -        SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src0_abs); -        SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src1_abs); -        SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src0_neg); -        SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src1_neg); +        SetFlagInNewMI(NewMI, &MI, R600::OpName::clamp); +        SetFlagInNewMI(NewMI, &MI, R600::OpName::literal); +        SetFlagInNewMI(NewMI, &MI, R600::OpName::src0_abs); +        SetFlagInNewMI(NewMI, &MI, R600::OpName::src1_abs); +        SetFlagInNewMI(NewMI, &MI, R600::OpName::src0_neg); +        SetFlagInNewMI(NewMI, &MI, R600::OpName::src1_neg);        }        MI.eraseFromParent();      } diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index 8ca29024552..0c31deb8c18 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -14,7 +14,6 @@  #include "R600ISelLowering.h"  #include "AMDGPUFrameLowering.h" -#include "AMDGPUIntrinsicInfo.h"  #include "AMDGPUSubtarget.h"  #include "R600Defines.h"  #include "R600FrameLowering.h" @@ -51,17 +50,31 @@  using namespace llvm; +static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT, +                            CCValAssign::LocInfo LocInfo, +                            ISD::ArgFlagsTy ArgFlags, CCState &State) { +  MachineFunction &MF = State.getMachineFunction(); +  AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>(); + +  uint64_t Offset = MFI->allocateKernArg(LocVT.getStoreSize(), +                                         ArgFlags.getOrigAlign()); +  State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo)); +  return true; +} + +#include "R600GenCallingConv.inc" +  R600TargetLowering::R600TargetLowering(const TargetMachine &TM,                                         const R600Subtarget &STI) -    : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) { -  addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); -  addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); -  addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass); -  addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass); -  addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); -  addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); +    : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) { +  addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass); +  addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass); +  addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass); +  addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass); +  addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass); +  addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass); -  computeRegisterProperties(STI.getRegisterInfo()); +  computeRegisterProperties(Subtarget->getRegisterInfo());    // Legalize loads and stores to the private address space.    setOperationAction(ISD::LOAD, MVT::i32, Custom); @@ -148,6 +161,11 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,    setOperationAction(ISD::FSUB, MVT::f32, Expand); +  setOperationAction(ISD::FCEIL, MVT::f64, Custom); +  setOperationAction(ISD::FTRUNC, MVT::f64, Custom); +  setOperationAction(ISD::FRINT, MVT::f64, Custom); +  setOperationAction(ISD::FFLOOR, MVT::f64, Custom); +    setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);    setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); @@ -216,6 +234,34 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,      setOperationAction(ISD::FMA, MVT::f32, Expand);      setOperationAction(ISD::FMA, MVT::f64, Expand);    } +  +  // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we +  // need it for R600. +  if (!Subtarget->hasFP32Denormals()) +    setOperationAction(ISD::FMAD, MVT::f32, Legal); + +  if (!Subtarget->hasBFI()) { +    // fcopysign can be done in a single instruction with BFI. +    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); +    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); +  } + +  if (!Subtarget->hasBCNT(32)) +    setOperationAction(ISD::CTPOP, MVT::i32, Expand); + +  if (!Subtarget->hasBCNT(64)) +    setOperationAction(ISD::CTPOP, MVT::i64, Expand); + +  if (Subtarget->hasFFBH()) +    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom); + +  if (Subtarget->hasFFBL()) +    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom); + +  // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we +  // need it for R600. +  if (Subtarget->hasBFE()) +    setHasExtractBitsInsn(true);    setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); @@ -246,14 +292,10 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,    setTargetDAGCombine(ISD::LOAD);  } -const R600Subtarget *R600TargetLowering::getSubtarget() const { -  return static_cast<const R600Subtarget *>(Subtarget); -} -  static inline bool isEOP(MachineBasicBlock::iterator I) {    if (std::next(I) == I->getParent()->end())      return false; -  return std::next(I)->getOpcode() == AMDGPU::RETURN; +  return std::next(I)->getOpcode() == R600::RETURN;  }  MachineBasicBlock * @@ -262,24 +304,24 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,    MachineFunction *MF = BB->getParent();    MachineRegisterInfo &MRI = MF->getRegInfo();    MachineBasicBlock::iterator I = MI; -  const R600InstrInfo *TII = getSubtarget()->getInstrInfo(); +  const R600InstrInfo *TII = Subtarget->getInstrInfo();    switch (MI.getOpcode()) {    default:      // Replace LDS_*_RET instruction that don't have any uses with the      // equivalent LDS_*_NORET instruction.      if (TII->isLDSRetInstr(MI.getOpcode())) { -      int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst); +      int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);        assert(DstIdx != -1);        MachineInstrBuilder NewMI;        // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add        //        LDS_1A2D support and remove this special case.        if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) || -          MI.getOpcode() == AMDGPU::LDS_CMPST_RET) +          MI.getOpcode() == R600::LDS_CMPST_RET)          return BB;        NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), -                      TII->get(AMDGPU::getLDSNoRetOp(MI.getOpcode()))); +                      TII->get(R600::getLDSNoRetOp(MI.getOpcode())));        for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {          NewMI.add(MI.getOperand(i));        } @@ -288,23 +330,23 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,      }      break; -  case AMDGPU::FABS_R600: { +  case R600::FABS_R600: {      MachineInstr *NewMI = TII->buildDefaultInstruction( -        *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(), +        *BB, I, R600::MOV, MI.getOperand(0).getReg(),          MI.getOperand(1).getReg());      TII->addFlag(*NewMI, 0, MO_FLAG_ABS);      break;    } -  case AMDGPU::FNEG_R600: { +  case R600::FNEG_R600: {      MachineInstr *NewMI = TII->buildDefaultInstruction( -        *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(), +        *BB, I, R600::MOV, MI.getOperand(0).getReg(),          MI.getOperand(1).getReg());      TII->addFlag(*NewMI, 0, MO_FLAG_NEG);      break;    } -  case AMDGPU::MASK_WRITE: { +  case R600::MASK_WRITE: {      unsigned maskedRegister = MI.getOperand(0).getReg();      assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));      MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); @@ -312,7 +354,7 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,      break;    } -  case AMDGPU::MOV_IMM_F32: +  case R600::MOV_IMM_F32:      TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)                                                              .getFPImm()                                                              ->getValueAPF() @@ -320,39 +362,39 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,                                                              .getZExtValue());      break; -  case AMDGPU::MOV_IMM_I32: +  case R600::MOV_IMM_I32:      TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),                       MI.getOperand(1).getImm());      break; -  case AMDGPU::MOV_IMM_GLOBAL_ADDR: { +  case R600::MOV_IMM_GLOBAL_ADDR: {      //TODO: Perhaps combine this instruction with the next if possible      auto MIB = TII->buildDefaultInstruction( -        *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_LITERAL_X); -    int Idx = TII->getOperandIdx(*MIB, AMDGPU::OpName::literal); +        *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X); +    int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);      //TODO: Ugh this is rather ugly      MIB->getOperand(Idx) = MI.getOperand(1);      break;    } -  case AMDGPU::CONST_COPY: { +  case R600::CONST_COPY: {      MachineInstr *NewMI = TII->buildDefaultInstruction( -        *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_CONST); -    TII->setImmOperand(*NewMI, AMDGPU::OpName::src0_sel, +        *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST); +    TII->setImmOperand(*NewMI, R600::OpName::src0_sel,                         MI.getOperand(1).getImm());      break;    } -  case AMDGPU::RAT_WRITE_CACHELESS_32_eg: -  case AMDGPU::RAT_WRITE_CACHELESS_64_eg: -  case AMDGPU::RAT_WRITE_CACHELESS_128_eg: +  case R600::RAT_WRITE_CACHELESS_32_eg: +  case R600::RAT_WRITE_CACHELESS_64_eg: +  case R600::RAT_WRITE_CACHELESS_128_eg:      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))          .add(MI.getOperand(0))          .add(MI.getOperand(1))          .addImm(isEOP(I)); // Set End of program bit      break; -  case AMDGPU::RAT_STORE_TYPED_eg: +  case R600::RAT_STORE_TYPED_eg:      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))          .add(MI.getOperand(0))          .add(MI.getOperand(1)) @@ -360,49 +402,49 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,          .addImm(isEOP(I)); // Set End of program bit      break; -  case AMDGPU::BRANCH: -    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) +  case R600::BRANCH: +    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))          .add(MI.getOperand(0));      break; -  case AMDGPU::BRANCH_COND_f32: { +  case R600::BRANCH_COND_f32: {      MachineInstr *NewMI = -        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X), -                AMDGPU::PREDICATE_BIT) +        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X), +                R600::PREDICATE_BIT)              .add(MI.getOperand(1)) -            .addImm(AMDGPU::PRED_SETNE) +            .addImm(R600::PRED_SETNE)              .addImm(0); // Flags      TII->addFlag(*NewMI, 0, MO_FLAG_PUSH); -    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND)) +    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))          .add(MI.getOperand(0)) -        .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); +        .addReg(R600::PREDICATE_BIT, RegState::Kill);      break;    } -  case AMDGPU::BRANCH_COND_i32: { +  case R600::BRANCH_COND_i32: {      MachineInstr *NewMI = -        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X), -                AMDGPU::PREDICATE_BIT) +        BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X), +                R600::PREDICATE_BIT)              .add(MI.getOperand(1)) -            .addImm(AMDGPU::PRED_SETNE_INT) +            .addImm(R600::PRED_SETNE_INT)              .addImm(0); // Flags      TII->addFlag(*NewMI, 0, MO_FLAG_PUSH); -    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND)) +    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))          .add(MI.getOperand(0)) -        .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); +        .addReg(R600::PREDICATE_BIT, RegState::Kill);      break;    } -  case AMDGPU::EG_ExportSwz: -  case AMDGPU::R600_ExportSwz: { +  case R600::EG_ExportSwz: +  case R600::R600_ExportSwz: {      // Instruction is left unmodified if its not the last one of its type      bool isLastInstructionOfItsType = true;      unsigned InstExportType = MI.getOperand(1).getImm();      for (MachineBasicBlock::iterator NextExportInst = std::next(I),           EndBlock = BB->end(); NextExportInst != EndBlock;           NextExportInst = std::next(NextExportInst)) { -      if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz || -          NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) { +      if (NextExportInst->getOpcode() == R600::EG_ExportSwz || +          NextExportInst->getOpcode() == R600::R600_ExportSwz) {          unsigned CurrentInstExportType = NextExportInst->getOperand(1)              .getImm();          if (CurrentInstExportType == InstExportType) { @@ -414,7 +456,7 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,      bool EOP = isEOP(I);      if (!EOP && !isLastInstructionOfItsType)        return BB; -    unsigned CfInst = (MI.getOpcode() == AMDGPU::EG_ExportSwz) ? 84 : 40; +    unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))          .add(MI.getOperand(0))          .add(MI.getOperand(1)) @@ -427,7 +469,7 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,          .addImm(EOP);      break;    } -  case AMDGPU::RETURN: { +  case R600::RETURN: {      return BB;    }    } @@ -583,23 +625,23 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const        return LowerImplicitParameter(DAG, VT, DL, 8);      case Intrinsic::r600_read_tgid_x: -      return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, -                                     AMDGPU::T1_X, VT); +      return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, +                                     R600::T1_X, VT);      case Intrinsic::r600_read_tgid_y: -      return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, -                                     AMDGPU::T1_Y, VT); +      return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, +                                     R600::T1_Y, VT);      case Intrinsic::r600_read_tgid_z: -      return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, -                                     AMDGPU::T1_Z, VT); +      return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, +                                     R600::T1_Z, VT);      case Intrinsic::r600_read_tidig_x: -      return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, -                                     AMDGPU::T0_X, VT); +      return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, +                                     R600::T0_X, VT);      case Intrinsic::r600_read_tidig_y: -      return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, -                                     AMDGPU::T0_Y, VT); +      return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, +                                     R600::T0_Y, VT);      case Intrinsic::r600_read_tidig_z: -      return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass, -                                     AMDGPU::T0_Z, VT); +      return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, +                                     R600::T0_Z, VT);      case Intrinsic::r600_recipsqrt_ieee:        return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1)); @@ -1521,7 +1563,7 @@ SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {  SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,                                              SelectionDAG &DAG) const {    MachineFunction &MF = DAG.getMachineFunction(); -  const R600FrameLowering *TFL = getSubtarget()->getFrameLowering(); +  const R600FrameLowering *TFL = Subtarget->getFrameLowering();    FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op); @@ -1533,6 +1575,28 @@ SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,                           Op.getValueType());  } +CCAssignFn *R600TargetLowering::CCAssignFnForCall(CallingConv::ID CC, +                                                  bool IsVarArg) const { +  switch (CC) { +  case CallingConv::AMDGPU_KERNEL: +  case CallingConv::SPIR_KERNEL: +  case CallingConv::C: +  case CallingConv::Fast: +  case CallingConv::Cold: +    return CC_R600_Kernel; +  case CallingConv::AMDGPU_VS: +  case CallingConv::AMDGPU_GS: +  case CallingConv::AMDGPU_PS: +  case CallingConv::AMDGPU_CS: +  case CallingConv::AMDGPU_HS: +  case CallingConv::AMDGPU_ES: +  case CallingConv::AMDGPU_LS: +    return CC_R600; +  default: +    report_fatal_error("Unsupported calling convention."); +  } +} +  /// XXX Only kernel functions are supported, so we can assume for now that  /// every function is a kernel function, but in the future we should use  /// separate calling conventions for kernel and non-kernel functions. @@ -1563,7 +1627,7 @@ SDValue R600TargetLowering::LowerFormalArguments(      }      if (AMDGPU::isShader(CallConv)) { -      unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass); +      unsigned Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);        SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);        InVals.push_back(Register);        continue; @@ -1594,7 +1658,7 @@ SDValue R600TargetLowering::LowerFormalArguments(      unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();      unsigned PartOffset = VA.getLocMemOffset(); -    unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF.getFunction()) + +    unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) +                        VA.getLocMemOffset();      MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase); @@ -1981,26 +2045,26 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,                                       SDValue &Src, SDValue &Neg, SDValue &Abs,                                       SDValue &Sel, SDValue &Imm,                                       SelectionDAG &DAG) const { -  const R600InstrInfo *TII = getSubtarget()->getInstrInfo(); +  const R600InstrInfo *TII = Subtarget->getInstrInfo();    if (!Src.isMachineOpcode())      return false;    switch (Src.getMachineOpcode()) { -  case AMDGPU::FNEG_R600: +  case R600::FNEG_R600:      if (!Neg.getNode())        return false;      Src = Src.getOperand(0);      Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);      return true; -  case AMDGPU::FABS_R600: +  case R600::FABS_R600:      if (!Abs.getNode())        return false;      Src = Src.getOperand(0);      Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);      return true; -  case AMDGPU::CONST_COPY: { +  case R600::CONST_COPY: {      unsigned Opcode = ParentNode->getMachineOpcode(); -    bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; +    bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;      if (!Sel.getNode())        return false; @@ -2011,17 +2075,17 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,      // Gather constants values      int SrcIndices[] = { -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src2), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W) +      TII->getOperandIdx(Opcode, R600::OpName::src0), +      TII->getOperandIdx(Opcode, R600::OpName::src1), +      TII->getOperandIdx(Opcode, R600::OpName::src2), +      TII->getOperandIdx(Opcode, R600::OpName::src0_X), +      TII->getOperandIdx(Opcode, R600::OpName::src0_Y), +      TII->getOperandIdx(Opcode, R600::OpName::src0_Z), +      TII->getOperandIdx(Opcode, R600::OpName::src0_W), +      TII->getOperandIdx(Opcode, R600::OpName::src1_X), +      TII->getOperandIdx(Opcode, R600::OpName::src1_Y), +      TII->getOperandIdx(Opcode, R600::OpName::src1_Z), +      TII->getOperandIdx(Opcode, R600::OpName::src1_W)      };      std::vector<unsigned> Consts;      for (int OtherSrcIdx : SrcIndices) { @@ -2034,7 +2098,7 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,        }        if (RegisterSDNode *Reg =            dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) { -        if (Reg->getReg() == AMDGPU::ALU_CONST) { +        if (Reg->getReg() == R600::ALU_CONST) {            ConstantSDNode *Cst              = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));            Consts.push_back(Cst->getZExtValue()); @@ -2049,30 +2113,30 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,      }      Sel = CstOffset; -    Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32); +    Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);      return true;    } -  case AMDGPU::MOV_IMM_GLOBAL_ADDR: +  case R600::MOV_IMM_GLOBAL_ADDR:      // Check if the Imm slot is used. Taken from below.      if (cast<ConstantSDNode>(Imm)->getZExtValue())        return false;      Imm = Src.getOperand(0); -    Src = DAG.getRegister(AMDGPU::ALU_LITERAL_X, MVT::i32); +    Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);      return true; -  case AMDGPU::MOV_IMM_I32: -  case AMDGPU::MOV_IMM_F32: { -    unsigned ImmReg = AMDGPU::ALU_LITERAL_X; +  case R600::MOV_IMM_I32: +  case R600::MOV_IMM_F32: { +    unsigned ImmReg = R600::ALU_LITERAL_X;      uint64_t ImmValue = 0; -    if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) { +    if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {        ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));        float FloatValue = FPC->getValueAPF().convertToFloat();        if (FloatValue == 0.0) { -        ImmReg = AMDGPU::ZERO; +        ImmReg = R600::ZERO;        } else if (FloatValue == 0.5) { -        ImmReg = AMDGPU::HALF; +        ImmReg = R600::HALF;        } else if (FloatValue == 1.0) { -        ImmReg = AMDGPU::ONE; +        ImmReg = R600::ONE;        } else {          ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();        } @@ -2080,9 +2144,9 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,        ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));        uint64_t Value = C->getZExtValue();        if (Value == 0) { -        ImmReg = AMDGPU::ZERO; +        ImmReg = R600::ZERO;        } else if (Value == 1) { -        ImmReg = AMDGPU::ONE_INT; +        ImmReg = R600::ONE_INT;        } else {          ImmValue = Value;        } @@ -2091,7 +2155,7 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,      // Check that we aren't already using an immediate.      // XXX: It's possible for an instruction to have more than one      // immediate operand, but this is not supported yet. -    if (ImmReg == AMDGPU::ALU_LITERAL_X) { +    if (ImmReg == R600::ALU_LITERAL_X) {        if (!Imm.getNode())          return false;        ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm); @@ -2111,7 +2175,7 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,  /// Fold the instructions after selecting them  SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,                                              SelectionDAG &DAG) const { -  const R600InstrInfo *TII = getSubtarget()->getInstrInfo(); +  const R600InstrInfo *TII = Subtarget->getInstrInfo();    if (!Node->isMachineOpcode())      return Node; @@ -2120,36 +2184,36 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,    std::vector<SDValue> Ops(Node->op_begin(), Node->op_end()); -  if (Opcode == AMDGPU::DOT_4) { +  if (Opcode == R600::DOT_4) {      int OperandIdx[] = { -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W) +      TII->getOperandIdx(Opcode, R600::OpName::src0_X), +      TII->getOperandIdx(Opcode, R600::OpName::src0_Y), +      TII->getOperandIdx(Opcode, R600::OpName::src0_Z), +      TII->getOperandIdx(Opcode, R600::OpName::src0_W), +      TII->getOperandIdx(Opcode, R600::OpName::src1_X), +      TII->getOperandIdx(Opcode, R600::OpName::src1_Y), +      TII->getOperandIdx(Opcode, R600::OpName::src1_Z), +      TII->getOperandIdx(Opcode, R600::OpName::src1_W)          };      int NegIdx[] = { -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W) +      TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X), +      TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y), +      TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z), +      TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W), +      TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X), +      TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y), +      TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z), +      TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)      };      int AbsIdx[] = { -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W) +      TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X), +      TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y), +      TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z), +      TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W), +      TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X), +      TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y), +      TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z), +      TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)      };      for (unsigned i = 0; i < 8; i++) {        if (OperandIdx[i] < 0) @@ -2157,7 +2221,7 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,        SDValue &Src = Ops[OperandIdx[i] - 1];        SDValue &Neg = Ops[NegIdx[i] - 1];        SDValue &Abs = Ops[AbsIdx[i] - 1]; -      bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; +      bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;        int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);        if (HasDst)          SelIdx--; @@ -2165,7 +2229,7 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,        if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))          return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);      } -  } else if (Opcode == AMDGPU::REG_SEQUENCE) { +  } else if (Opcode == R600::REG_SEQUENCE) {      for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {        SDValue &Src = Ops[i];        if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG)) @@ -2175,18 +2239,18 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,      if (!TII->hasInstrModifiers(Opcode))        return Node;      int OperandIdx[] = { -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src2) +      TII->getOperandIdx(Opcode, R600::OpName::src0), +      TII->getOperandIdx(Opcode, R600::OpName::src1), +      TII->getOperandIdx(Opcode, R600::OpName::src2)      };      int NegIdx[] = { -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg) +      TII->getOperandIdx(Opcode, R600::OpName::src0_neg), +      TII->getOperandIdx(Opcode, R600::OpName::src1_neg), +      TII->getOperandIdx(Opcode, R600::OpName::src2_neg)      };      int AbsIdx[] = { -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs), -      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs), +      TII->getOperandIdx(Opcode, R600::OpName::src0_abs), +      TII->getOperandIdx(Opcode, R600::OpName::src1_abs),        -1      };      for (unsigned i = 0; i < 3; i++) { @@ -2196,9 +2260,9 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,        SDValue &Neg = Ops[NegIdx[i] - 1];        SDValue FakeAbs;        SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs; -      bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1; +      bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;        int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]); -      int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal); +      int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);        if (HasDst) {          SelIdx--;          ImmIdx--; diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.h b/llvm/lib/Target/AMDGPU/R600ISelLowering.h index cc55a414139..907d1f10e15 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.h +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.h @@ -23,6 +23,8 @@ class R600InstrInfo;  class R600Subtarget;  class R600TargetLowering final : public AMDGPUTargetLowering { + +  const R600Subtarget *Subtarget;  public:    R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI); @@ -36,6 +38,7 @@ public:    void ReplaceNodeResults(SDNode * N,                            SmallVectorImpl<SDValue> &Results,                            SelectionDAG &DAG) const override; +  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;    SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,                                 bool isVarArg,                                 const SmallVectorImpl<ISD::InputArg> &Ins, diff --git a/llvm/lib/Target/AMDGPU/R600InstrFormats.td b/llvm/lib/Target/AMDGPU/R600InstrFormats.td index 61106ed42e6..85d0b2d535b 100644 --- a/llvm/lib/Target/AMDGPU/R600InstrFormats.td +++ b/llvm/lib/Target/AMDGPU/R600InstrFormats.td @@ -41,7 +41,7 @@ class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,    bit LDS_1A2D = 0;    let SubtargetPredicate = isR600toCayman; -  let Namespace = "AMDGPU"; +  let Namespace = "R600";    let OutOperandList = outs;    let InOperandList = ins;    let AsmString = asm; diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp index 4864933390d..0afea658fa2 100644 --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -45,10 +45,15 @@  using namespace llvm;  #define GET_INSTRINFO_CTOR_DTOR -#include "AMDGPUGenDFAPacketizer.inc" +#include "R600GenDFAPacketizer.inc" + +#define GET_INSTRINFO_CTOR_DTOR +#define GET_INSTRMAP_INFO +#define GET_INSTRINFO_NAMED_OPS +#include "R600GenInstrInfo.inc"  R600InstrInfo::R600InstrInfo(const R600Subtarget &ST) -  : AMDGPUInstrInfo(ST), RI(), ST(ST) {} +  : R600GenInstrInfo(-1, -1), RI(), ST(ST) {}  bool R600InstrInfo::isVector(const MachineInstr &MI) const {    return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; @@ -59,31 +64,31 @@ void R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,                                  const DebugLoc &DL, unsigned DestReg,                                  unsigned SrcReg, bool KillSrc) const {    unsigned VectorComponents = 0; -  if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) || -      AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) && -      (AMDGPU::R600_Reg128RegClass.contains(SrcReg) || -       AMDGPU::R600_Reg128VerticalRegClass.contains(SrcReg))) { +  if ((R600::R600_Reg128RegClass.contains(DestReg) || +      R600::R600_Reg128VerticalRegClass.contains(DestReg)) && +      (R600::R600_Reg128RegClass.contains(SrcReg) || +       R600::R600_Reg128VerticalRegClass.contains(SrcReg))) {      VectorComponents = 4; -  } else if((AMDGPU::R600_Reg64RegClass.contains(DestReg) || -            AMDGPU::R600_Reg64VerticalRegClass.contains(DestReg)) && -            (AMDGPU::R600_Reg64RegClass.contains(SrcReg) || -             AMDGPU::R600_Reg64VerticalRegClass.contains(SrcReg))) { +  } else if((R600::R600_Reg64RegClass.contains(DestReg) || +            R600::R600_Reg64VerticalRegClass.contains(DestReg)) && +            (R600::R600_Reg64RegClass.contains(SrcReg) || +             R600::R600_Reg64VerticalRegClass.contains(SrcReg))) {      VectorComponents = 2;    }    if (VectorComponents > 0) {      for (unsigned I = 0; I < VectorComponents; I++) {        unsigned SubRegIndex = AMDGPURegisterInfo::getSubRegFromChannel(I); -      buildDefaultInstruction(MBB, MI, AMDGPU::MOV, +      buildDefaultInstruction(MBB, MI, R600::MOV,                                RI.getSubReg(DestReg, SubRegIndex),                                RI.getSubReg(SrcReg, SubRegIndex))                                .addReg(DestReg,                                        RegState::Define | RegState::Implicit);      }    } else { -    MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV, +    MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, R600::MOV,                                                    DestReg, SrcReg); -    NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0)) +    NewMI->getOperand(getOperandIdx(*NewMI, R600::OpName::src0))                                      .setIsKill(KillSrc);    }  } @@ -104,9 +109,9 @@ bool R600InstrInfo::isMov(unsigned Opcode) const {    switch(Opcode) {    default:      return false; -  case AMDGPU::MOV: -  case AMDGPU::MOV_IMM_F32: -  case AMDGPU::MOV_IMM_I32: +  case R600::MOV: +  case R600::MOV_IMM_F32: +  case R600::MOV_IMM_I32:      return true;    }  } @@ -118,10 +123,10 @@ bool R600InstrInfo::isReductionOp(unsigned Opcode) const {  bool R600InstrInfo::isCubeOp(unsigned Opcode) const {    switch(Opcode) {      default: return false; -    case AMDGPU::CUBE_r600_pseudo: -    case AMDGPU::CUBE_r600_real: -    case AMDGPU::CUBE_eg_pseudo: -    case AMDGPU::CUBE_eg_real: +    case R600::CUBE_r600_pseudo: +    case R600::CUBE_r600_real: +    case R600::CUBE_eg_pseudo: +    case R600::CUBE_eg_real:        return true;    }  } @@ -149,7 +154,7 @@ bool R600InstrInfo::isLDSInstr(unsigned Opcode) const {  }  bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const { -  return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1; +  return isLDSInstr(Opcode) && getOperandIdx(Opcode, R600::OpName::dst) != -1;  }  bool R600InstrInfo::canBeConsideredALU(const MachineInstr &MI) const { @@ -158,12 +163,12 @@ bool R600InstrInfo::canBeConsideredALU(const MachineInstr &MI) const {    if (isVector(MI) || isCubeOp(MI.getOpcode()))      return true;    switch (MI.getOpcode()) { -  case AMDGPU::PRED_X: -  case AMDGPU::INTERP_PAIR_XY: -  case AMDGPU::INTERP_PAIR_ZW: -  case AMDGPU::INTERP_VEC_LOAD: -  case AMDGPU::COPY: -  case AMDGPU::DOT_4: +  case R600::PRED_X: +  case R600::INTERP_PAIR_XY: +  case R600::INTERP_PAIR_ZW: +  case R600::INTERP_VEC_LOAD: +  case R600::COPY: +  case R600::DOT_4:      return true;    default:      return false; @@ -173,7 +178,7 @@ bool R600InstrInfo::canBeConsideredALU(const MachineInstr &MI) const {  bool R600InstrInfo::isTransOnly(unsigned Opcode) const {    if (ST.hasCaymanISA())      return false; -  return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU); +  return (get(Opcode).getSchedClass() == R600::Sched::TransALU);  }  bool R600InstrInfo::isTransOnly(const MachineInstr &MI) const { @@ -181,7 +186,7 @@ bool R600InstrInfo::isTransOnly(const MachineInstr &MI) const {  }  bool R600InstrInfo::isVectorOnly(unsigned Opcode) const { -  return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU); +  return (get(Opcode).getSchedClass() == R600::Sched::VecALU);  }  bool R600InstrInfo::isVectorOnly(const MachineInstr &MI) const { @@ -215,8 +220,8 @@ bool R600InstrInfo::usesTextureCache(const MachineInstr &MI) const {  bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {    switch (Opcode) { -  case AMDGPU::KILLGT: -  case AMDGPU::GROUP_BARRIER: +  case R600::KILLGT: +  case R600::GROUP_BARRIER:      return true;    default:      return false; @@ -224,11 +229,11 @@ bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {  }  bool R600InstrInfo::usesAddressRegister(MachineInstr &MI) const { -  return MI.findRegisterUseOperandIdx(AMDGPU::AR_X) != -1; +  return MI.findRegisterUseOperandIdx(R600::AR_X) != -1;  }  bool R600InstrInfo::definesAddressRegister(MachineInstr &MI) const { -  return MI.findRegisterDefOperandIdx(AMDGPU::AR_X) != -1; +  return MI.findRegisterDefOperandIdx(R600::AR_X) != -1;  }  bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const { @@ -242,7 +247,7 @@ bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const {          TargetRegisterInfo::isVirtualRegister(I->getReg()))        continue; -    if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg())) +    if (R600::R600_LDS_SRC_REGRegClass.contains(I->getReg()))        return true;    }    return false; @@ -250,17 +255,17 @@ bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const {  int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {    static const unsigned SrcSelTable[][2] = { -    {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, -    {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, -    {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, -    {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, -    {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, -    {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, -    {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, -    {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, -    {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, -    {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, -    {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W} +    {R600::OpName::src0, R600::OpName::src0_sel}, +    {R600::OpName::src1, R600::OpName::src1_sel}, +    {R600::OpName::src2, R600::OpName::src2_sel}, +    {R600::OpName::src0_X, R600::OpName::src0_sel_X}, +    {R600::OpName::src0_Y, R600::OpName::src0_sel_Y}, +    {R600::OpName::src0_Z, R600::OpName::src0_sel_Z}, +    {R600::OpName::src0_W, R600::OpName::src0_sel_W}, +    {R600::OpName::src1_X, R600::OpName::src1_sel_X}, +    {R600::OpName::src1_Y, R600::OpName::src1_sel_Y}, +    {R600::OpName::src1_Z, R600::OpName::src1_sel_Z}, +    {R600::OpName::src1_W, R600::OpName::src1_sel_W}    };    for (const auto &Row : SrcSelTable) { @@ -275,23 +280,23 @@ SmallVector<std::pair<MachineOperand *, int64_t>, 3>  R600InstrInfo::getSrcs(MachineInstr &MI) const {    SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result; -  if (MI.getOpcode() == AMDGPU::DOT_4) { +  if (MI.getOpcode() == R600::DOT_4) {      static const unsigned OpTable[8][2] = { -      {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, -      {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, -      {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, -      {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, -      {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, -      {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, -      {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, -      {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}, +      {R600::OpName::src0_X, R600::OpName::src0_sel_X}, +      {R600::OpName::src0_Y, R600::OpName::src0_sel_Y}, +      {R600::OpName::src0_Z, R600::OpName::src0_sel_Z}, +      {R600::OpName::src0_W, R600::OpName::src0_sel_W}, +      {R600::OpName::src1_X, R600::OpName::src1_sel_X}, +      {R600::OpName::src1_Y, R600::OpName::src1_sel_Y}, +      {R600::OpName::src1_Z, R600::OpName::src1_sel_Z}, +      {R600::OpName::src1_W, R600::OpName::src1_sel_W},      };      for (unsigned j = 0; j < 8; j++) {        MachineOperand &MO =            MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][0]));        unsigned Reg = MO.getReg(); -      if (Reg == AMDGPU::ALU_CONST) { +      if (Reg == R600::ALU_CONST) {          MachineOperand &Sel =              MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1]));          Result.push_back(std::make_pair(&MO, Sel.getImm())); @@ -303,9 +308,9 @@ R600InstrInfo::getSrcs(MachineInstr &MI) const {    }    static const unsigned OpTable[3][2] = { -    {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, -    {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, -    {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, +    {R600::OpName::src0, R600::OpName::src0_sel}, +    {R600::OpName::src1, R600::OpName::src1_sel}, +    {R600::OpName::src2, R600::OpName::src2_sel},    };    for (unsigned j = 0; j < 3; j++) { @@ -314,15 +319,15 @@ R600InstrInfo::getSrcs(MachineInstr &MI) const {        break;      MachineOperand &MO = MI.getOperand(SrcIdx);      unsigned Reg = MO.getReg(); -    if (Reg == AMDGPU::ALU_CONST) { +    if (Reg == R600::ALU_CONST) {        MachineOperand &Sel =            MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1]));        Result.push_back(std::make_pair(&MO, Sel.getImm()));        continue;      } -    if (Reg == AMDGPU::ALU_LITERAL_X) { +    if (Reg == R600::ALU_LITERAL_X) {        MachineOperand &Operand = -          MI.getOperand(getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal)); +          MI.getOperand(getOperandIdx(MI.getOpcode(), R600::OpName::literal));        if (Operand.isImm()) {          Result.push_back(std::make_pair(&MO, Operand.getImm()));          continue; @@ -346,7 +351,7 @@ R600InstrInfo::ExtractSrcs(MachineInstr &MI,      ++i;      unsigned Reg = Src.first->getReg();      int Index = RI.getEncodingValue(Reg) & 0xff; -    if (Reg == AMDGPU::OQAP) { +    if (Reg == R600::OQAP) {        Result.push_back(std::make_pair(Index, 0U));      }      if (PV.find(Reg) != PV.end()) { @@ -436,7 +441,7 @@ unsigned  R600InstrInfo::isLegalUpTo(        const std::pair<int, unsigned> &Src = Srcs[j];        if (Src.first < 0 || Src.first == 255)          continue; -      if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) { +      if (Src.first == GET_REG_INDEX(RI.getEncodingValue(R600::OQAP))) {          if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&              Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) {              // The value from output queue A (denoted by register OQAP) can @@ -542,7 +547,7 @@ R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,    for (unsigned i = 0, e = IG.size(); i < e; ++i) {      IGSrcs.push_back(ExtractSrcs(*IG[i], PV, ConstCount));      unsigned Op = getOperandIdx(IG[i]->getOpcode(), -        AMDGPU::OpName::bank_swizzle); +        R600::OpName::bank_swizzle);      ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)          IG[i]->getOperand(Op).getImm());    } @@ -611,14 +616,14 @@ R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)        continue;      for (const auto &Src : getSrcs(MI)) { -      if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X) +      if (Src.first->getReg() == R600::ALU_LITERAL_X)          Literals.insert(Src.second);        if (Literals.size() > 4)          return false; -      if (Src.first->getReg() == AMDGPU::ALU_CONST) +      if (Src.first->getReg() == R600::ALU_CONST)          Consts.push_back(Src.second); -      if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) || -          AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) { +      if (R600::R600_KC0RegClass.contains(Src.first->getReg()) || +          R600::R600_KC1RegClass.contains(Src.first->getReg())) {          unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff;          unsigned Chan = RI.getHWRegChan(Src.first->getReg());          Consts.push_back((Index << 2) | Chan); @@ -637,7 +642,7 @@ R600InstrInfo::CreateTargetScheduleState(const TargetSubtargetInfo &STI) const {  static bool  isPredicateSetter(unsigned Opcode) {    switch (Opcode) { -  case AMDGPU::PRED_X: +  case R600::PRED_X:      return true;    default:      return false; @@ -659,12 +664,12 @@ findFirstPredicateSetterFrom(MachineBasicBlock &MBB,  static  bool isJump(unsigned Opcode) { -  return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND; +  return Opcode == R600::JUMP || Opcode == R600::JUMP_COND;  }  static bool isBranch(unsigned Opcode) { -  return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 || -      Opcode == AMDGPU::BRANCH_COND_f32; +  return Opcode == R600::BRANCH || Opcode == R600::BRANCH_COND_i32 || +      Opcode == R600::BRANCH_COND_f32;  }  bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB, @@ -679,7 +684,7 @@ bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,    if (I == MBB.end())      return false; -  // AMDGPU::BRANCH* instructions are only available after isel and are not +  // R600::BRANCH* instructions are only available after isel and are not    // handled    if (isBranch(I->getOpcode()))      return true; @@ -688,7 +693,7 @@ bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,    }    // Remove successive JUMP -  while (I != MBB.begin() && std::prev(I)->getOpcode() == AMDGPU::JUMP) { +  while (I != MBB.begin() && std::prev(I)->getOpcode() == R600::JUMP) {        MachineBasicBlock::iterator PriorI = std::prev(I);        if (AllowModify)          I->removeFromParent(); @@ -699,10 +704,10 @@ bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,    // If there is only one terminator instruction, process it.    unsigned LastOpc = LastInst.getOpcode();    if (I == MBB.begin() || !isJump((--I)->getOpcode())) { -    if (LastOpc == AMDGPU::JUMP) { +    if (LastOpc == R600::JUMP) {        TBB = LastInst.getOperand(0).getMBB();        return false; -    } else if (LastOpc == AMDGPU::JUMP_COND) { +    } else if (LastOpc == R600::JUMP_COND) {        auto predSet = I;        while (!isPredicateSetter(predSet->getOpcode())) {          predSet = --I; @@ -710,7 +715,7 @@ bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,        TBB = LastInst.getOperand(0).getMBB();        Cond.push_back(predSet->getOperand(1));        Cond.push_back(predSet->getOperand(2)); -      Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); +      Cond.push_back(MachineOperand::CreateReg(R600::PRED_SEL_ONE, false));        return false;      }      return true;  // Can't handle indirect branch. @@ -721,7 +726,7 @@ bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,    unsigned SecondLastOpc = SecondLastInst.getOpcode();    // If the block ends with a B and a Bcc, handle it. -  if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) { +  if (SecondLastOpc == R600::JUMP_COND && LastOpc == R600::JUMP) {      auto predSet = --I;      while (!isPredicateSetter(predSet->getOpcode())) {        predSet = --I; @@ -730,7 +735,7 @@ bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,      FBB = LastInst.getOperand(0).getMBB();      Cond.push_back(predSet->getOperand(1));      Cond.push_back(predSet->getOperand(2)); -    Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); +    Cond.push_back(MachineOperand::CreateReg(R600::PRED_SEL_ONE, false));      return false;    } @@ -742,8 +747,8 @@ static  MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) {    for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();        It != E; ++It) { -    if (It->getOpcode() == AMDGPU::CF_ALU || -        It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) +    if (It->getOpcode() == R600::CF_ALU || +        It->getOpcode() == R600::CF_ALU_PUSH_BEFORE)        return It.getReverse();    }    return MBB.end(); @@ -760,7 +765,7 @@ unsigned R600InstrInfo::insertBranch(MachineBasicBlock &MBB,    if (!FBB) {      if (Cond.empty()) { -      BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB); +      BuildMI(&MBB, DL, get(R600::JUMP)).addMBB(TBB);        return 1;      } else {        MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); @@ -768,14 +773,14 @@ unsigned R600InstrInfo::insertBranch(MachineBasicBlock &MBB,        addFlag(*PredSet, 0, MO_FLAG_PUSH);        PredSet->getOperand(2).setImm(Cond[1].getImm()); -      BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) +      BuildMI(&MBB, DL, get(R600::JUMP_COND))               .addMBB(TBB) -             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); +             .addReg(R600::PREDICATE_BIT, RegState::Kill);        MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);        if (CfAlu == MBB.end())          return 1; -      assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); -      CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); +      assert (CfAlu->getOpcode() == R600::CF_ALU); +      CfAlu->setDesc(get(R600::CF_ALU_PUSH_BEFORE));        return 1;      }    } else { @@ -783,15 +788,15 @@ unsigned R600InstrInfo::insertBranch(MachineBasicBlock &MBB,      assert(PredSet && "No previous predicate !");      addFlag(*PredSet, 0, MO_FLAG_PUSH);      PredSet->getOperand(2).setImm(Cond[1].getImm()); -    BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) +    BuildMI(&MBB, DL, get(R600::JUMP_COND))              .addMBB(TBB) -            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); -    BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB); +            .addReg(R600::PREDICATE_BIT, RegState::Kill); +    BuildMI(&MBB, DL, get(R600::JUMP)).addMBB(FBB);      MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);      if (CfAlu == MBB.end())        return 2; -    assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); -    CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); +    assert (CfAlu->getOpcode() == R600::CF_ALU); +    CfAlu->setDesc(get(R600::CF_ALU_PUSH_BEFORE));      return 2;    }  } @@ -812,18 +817,18 @@ unsigned R600InstrInfo::removeBranch(MachineBasicBlock &MBB,    switch (I->getOpcode()) {    default:      return 0; -  case AMDGPU::JUMP_COND: { +  case R600::JUMP_COND: {      MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);      clearFlag(*predSet, 0, MO_FLAG_PUSH);      I->eraseFromParent();      MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);      if (CfAlu == MBB.end())        break; -    assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); -    CfAlu->setDesc(get(AMDGPU::CF_ALU)); +    assert (CfAlu->getOpcode() == R600::CF_ALU_PUSH_BEFORE); +    CfAlu->setDesc(get(R600::CF_ALU));      break;    } -  case AMDGPU::JUMP: +  case R600::JUMP:      I->eraseFromParent();      break;    } @@ -837,18 +842,18 @@ unsigned R600InstrInfo::removeBranch(MachineBasicBlock &MBB,      // FIXME: only one case??    default:      return 1; -  case AMDGPU::JUMP_COND: { +  case R600::JUMP_COND: {      MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);      clearFlag(*predSet, 0, MO_FLAG_PUSH);      I->eraseFromParent();      MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);      if (CfAlu == MBB.end())        break; -    assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); -    CfAlu->setDesc(get(AMDGPU::CF_ALU)); +    assert (CfAlu->getOpcode() == R600::CF_ALU_PUSH_BEFORE); +    CfAlu->setDesc(get(R600::CF_ALU));      break;    } -  case AMDGPU::JUMP: +  case R600::JUMP:      I->eraseFromParent();      break;    } @@ -863,9 +868,9 @@ bool R600InstrInfo::isPredicated(const MachineInstr &MI) const {    unsigned Reg = MI.getOperand(idx).getReg();    switch (Reg) {    default: return false; -  case AMDGPU::PRED_SEL_ONE: -  case AMDGPU::PRED_SEL_ZERO: -  case AMDGPU::PREDICATE_BIT: +  case R600::PRED_SEL_ONE: +  case R600::PRED_SEL_ZERO: +  case R600::PREDICATE_BIT:      return true;    }  } @@ -876,9 +881,9 @@ bool R600InstrInfo::isPredicable(const MachineInstr &MI) const {    // be predicated.  Until we have proper support for instruction clauses in the    // backend, we will mark KILL* instructions as unpredicable. -  if (MI.getOpcode() == AMDGPU::KILLGT) { +  if (MI.getOpcode() == R600::KILLGT) {      return false; -  } else if (MI.getOpcode() == AMDGPU::CF_ALU) { +  } else if (MI.getOpcode() == R600::CF_ALU) {      // If the clause start in the middle of MBB then the MBB has more      // than a single clause, unable to predicate several clauses.      if (MI.getParent()->begin() != MachineBasicBlock::const_iterator(MI)) @@ -888,7 +893,7 @@ bool R600InstrInfo::isPredicable(const MachineInstr &MI) const {    } else if (isVector(MI)) {      return false;    } else { -    return AMDGPUInstrInfo::isPredicable(MI); +    return TargetInstrInfo::isPredicable(MI);    }  } @@ -929,17 +934,17 @@ bool  R600InstrInfo::reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {    MachineOperand &MO = Cond[1];    switch (MO.getImm()) { -  case AMDGPU::PRED_SETE_INT: -    MO.setImm(AMDGPU::PRED_SETNE_INT); +  case R600::PRED_SETE_INT: +    MO.setImm(R600::PRED_SETNE_INT);      break; -  case AMDGPU::PRED_SETNE_INT: -    MO.setImm(AMDGPU::PRED_SETE_INT); +  case R600::PRED_SETNE_INT: +    MO.setImm(R600::PRED_SETE_INT);      break; -  case AMDGPU::PRED_SETE: -    MO.setImm(AMDGPU::PRED_SETNE); +  case R600::PRED_SETE: +    MO.setImm(R600::PRED_SETNE);      break; -  case AMDGPU::PRED_SETNE: -    MO.setImm(AMDGPU::PRED_SETE); +  case R600::PRED_SETNE: +    MO.setImm(R600::PRED_SETE);      break;    default:      return true; @@ -947,11 +952,11 @@ R600InstrInfo::reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) con    MachineOperand &MO2 = Cond[2];    switch (MO2.getReg()) { -  case AMDGPU::PRED_SEL_ZERO: -    MO2.setReg(AMDGPU::PRED_SEL_ONE); +  case R600::PRED_SEL_ZERO: +    MO2.setReg(R600::PRED_SEL_ONE);      break; -  case AMDGPU::PRED_SEL_ONE: -    MO2.setReg(AMDGPU::PRED_SEL_ZERO); +  case R600::PRED_SEL_ONE: +    MO2.setReg(R600::PRED_SEL_ZERO);      break;    default:      return true; @@ -968,22 +973,22 @@ bool R600InstrInfo::PredicateInstruction(MachineInstr &MI,                                           ArrayRef<MachineOperand> Pred) const {    int PIdx = MI.findFirstPredOperandIdx(); -  if (MI.getOpcode() == AMDGPU::CF_ALU) { +  if (MI.getOpcode() == R600::CF_ALU) {      MI.getOperand(8).setImm(0);      return true;    } -  if (MI.getOpcode() == AMDGPU::DOT_4) { -    MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_X)) +  if (MI.getOpcode() == R600::DOT_4) { +    MI.getOperand(getOperandIdx(MI, R600::OpName::pred_sel_X))          .setReg(Pred[2].getReg()); -    MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Y)) +    MI.getOperand(getOperandIdx(MI, R600::OpName::pred_sel_Y))          .setReg(Pred[2].getReg()); -    MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Z)) +    MI.getOperand(getOperandIdx(MI, R600::OpName::pred_sel_Z))          .setReg(Pred[2].getReg()); -    MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_W)) +    MI.getOperand(getOperandIdx(MI, R600::OpName::pred_sel_W))          .setReg(Pred[2].getReg());      MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); -    MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); +    MIB.addReg(R600::PREDICATE_BIT, RegState::Implicit);      return true;    } @@ -991,7 +996,7 @@ bool R600InstrInfo::PredicateInstruction(MachineInstr &MI,      MachineOperand &PMO = MI.getOperand(PIdx);      PMO.setReg(Pred[2].getReg());      MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); -    MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); +    MIB.addReg(R600::PREDICATE_BIT, RegState::Implicit);      return true;    } @@ -1021,20 +1026,20 @@ bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {    default: {      MachineBasicBlock *MBB = MI.getParent();      int OffsetOpIdx = -        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::addr); +        R600::getNamedOperandIdx(MI.getOpcode(), R600::OpName::addr);      // addr is a custom operand with multiple MI operands, and only the      // first MI operand is given a name.      int RegOpIdx = OffsetOpIdx + 1;      int ChanOpIdx = -        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::chan); +        R600::getNamedOperandIdx(MI.getOpcode(), R600::OpName::chan);      if (isRegisterLoad(MI)) {        int DstOpIdx = -          AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst); +          R600::getNamedOperandIdx(MI.getOpcode(), R600::OpName::dst);        unsigned RegIndex = MI.getOperand(RegOpIdx).getImm();        unsigned Channel = MI.getOperand(ChanOpIdx).getImm();        unsigned Address = calculateIndirectAddress(RegIndex, Channel);        unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg(); -      if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { +      if (OffsetReg == R600::INDIRECT_BASE_ADDR) {          buildMovInstr(MBB, MI, MI.getOperand(DstOpIdx).getReg(),                        getIndirectAddrRegClass()->getRegister(Address));        } else { @@ -1043,12 +1048,12 @@ bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {        }      } else if (isRegisterStore(MI)) {        int ValOpIdx = -          AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::val); +          R600::getNamedOperandIdx(MI.getOpcode(), R600::OpName::val);        unsigned RegIndex = MI.getOperand(RegOpIdx).getImm();        unsigned Channel = MI.getOperand(ChanOpIdx).getImm();        unsigned Address = calculateIndirectAddress(RegIndex, Channel);        unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg(); -      if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) { +      if (OffsetReg == R600::INDIRECT_BASE_ADDR) {          buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address),                        MI.getOperand(ValOpIdx).getReg());        } else { @@ -1063,15 +1068,15 @@ bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {      MBB->erase(MI);      return true;    } -  case AMDGPU::R600_EXTRACT_ELT_V2: -  case AMDGPU::R600_EXTRACT_ELT_V4: +  case R600::R600_EXTRACT_ELT_V2: +  case R600::R600_EXTRACT_ELT_V4:      buildIndirectRead(MI.getParent(), MI, MI.getOperand(0).getReg(),                        RI.getHWRegIndex(MI.getOperand(1).getReg()), //  Address                        MI.getOperand(2).getReg(),                        RI.getHWRegChan(MI.getOperand(1).getReg()));      break; -  case AMDGPU::R600_INSERT_ELT_V2: -  case AMDGPU::R600_INSERT_ELT_V4: +  case R600::R600_INSERT_ELT_V2: +  case R600::R600_INSERT_ELT_V4:      buildIndirectWrite(MI.getParent(), MI, MI.getOperand(2).getReg(), // Value                         RI.getHWRegIndex(MI.getOperand(1).getReg()),   // Address                         MI.getOperand(3).getReg(),                     // Offset @@ -1096,14 +1101,14 @@ void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,    for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {      for (unsigned Chan = 0; Chan < StackWidth; ++Chan) { -      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan); +      unsigned Reg = R600::R600_TReg32RegClass.getRegister((4 * Index) + Chan);        TRI.reserveRegisterTuples(Reserved, Reg);      }    }  }  const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const { -  return &AMDGPU::R600_TReg32_XRegClass; +  return &R600::R600_TReg32_XRegClass;  }  MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, @@ -1121,20 +1126,20 @@ MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,    unsigned AddrReg;    switch (AddrChan) {      default: llvm_unreachable("Invalid Channel"); -    case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break; -    case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break; -    case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break; -    case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break; +    case 0: AddrReg = R600::R600_AddrRegClass.getRegister(Address); break; +    case 1: AddrReg = R600::R600_Addr_YRegClass.getRegister(Address); break; +    case 2: AddrReg = R600::R600_Addr_ZRegClass.getRegister(Address); break; +    case 3: AddrReg = R600::R600_Addr_WRegClass.getRegister(Address); break;    } -  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, -                                               AMDGPU::AR_X, OffsetReg); -  setImmOperand(*MOVA, AMDGPU::OpName::write, 0); +  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, R600::MOVA_INT_eg, +                                               R600::AR_X, OffsetReg); +  setImmOperand(*MOVA, R600::OpName::write, 0); -  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, +  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, R600::MOV,                                        AddrReg, ValueReg) -                                      .addReg(AMDGPU::AR_X, +                                      .addReg(R600::AR_X,                                             RegState::Implicit | RegState::Kill); -  setImmOperand(*Mov, AMDGPU::OpName::dst_rel, 1); +  setImmOperand(*Mov, R600::OpName::dst_rel, 1);    return Mov;  } @@ -1153,21 +1158,21 @@ MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,    unsigned AddrReg;    switch (AddrChan) {      default: llvm_unreachable("Invalid Channel"); -    case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break; -    case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break; -    case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break; -    case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break; +    case 0: AddrReg = R600::R600_AddrRegClass.getRegister(Address); break; +    case 1: AddrReg = R600::R600_Addr_YRegClass.getRegister(Address); break; +    case 2: AddrReg = R600::R600_Addr_ZRegClass.getRegister(Address); break; +    case 3: AddrReg = R600::R600_Addr_WRegClass.getRegister(Address); break;    } -  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, -                                                       AMDGPU::AR_X, +  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, R600::MOVA_INT_eg, +                                                       R600::AR_X,                                                         OffsetReg); -  setImmOperand(*MOVA, AMDGPU::OpName::write, 0); -  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, +  setImmOperand(*MOVA, R600::OpName::write, 0); +  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, R600::MOV,                                        ValueReg,                                        AddrReg) -                                      .addReg(AMDGPU::AR_X, +                                      .addReg(R600::AR_X,                                             RegState::Implicit | RegState::Kill); -  setImmOperand(*Mov, AMDGPU::OpName::src0_rel, 1); +  setImmOperand(*Mov, R600::OpName::src0_rel, 1);    return Mov;  } @@ -1265,7 +1270,7 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB    //XXX: The r600g finalizer expects this to be 1, once we've moved the    //scheduling to the backend, we can change the default to 0.    MIB.addImm(1)        // $last -      .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel +      .addReg(R600::PRED_SEL_OFF) // $pred_sel        .addImm(0)         // $literal        .addImm(0);        // $bank_swizzle @@ -1286,23 +1291,23 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB  static unsigned getSlotedOps(unsigned  Op, unsigned Slot) {    switch (Op) { -  OPERAND_CASE(AMDGPU::OpName::update_exec_mask) -  OPERAND_CASE(AMDGPU::OpName::update_pred) -  OPERAND_CASE(AMDGPU::OpName::write) -  OPERAND_CASE(AMDGPU::OpName::omod) -  OPERAND_CASE(AMDGPU::OpName::dst_rel) -  OPERAND_CASE(AMDGPU::OpName::clamp) -  OPERAND_CASE(AMDGPU::OpName::src0) -  OPERAND_CASE(AMDGPU::OpName::src0_neg) -  OPERAND_CASE(AMDGPU::OpName::src0_rel) -  OPERAND_CASE(AMDGPU::OpName::src0_abs) -  OPERAND_CASE(AMDGPU::OpName::src0_sel) -  OPERAND_CASE(AMDGPU::OpName::src1) -  OPERAND_CASE(AMDGPU::OpName::src1_neg) -  OPERAND_CASE(AMDGPU::OpName::src1_rel) -  OPERAND_CASE(AMDGPU::OpName::src1_abs) -  OPERAND_CASE(AMDGPU::OpName::src1_sel) -  OPERAND_CASE(AMDGPU::OpName::pred_sel) +  OPERAND_CASE(R600::OpName::update_exec_mask) +  OPERAND_CASE(R600::OpName::update_pred) +  OPERAND_CASE(R600::OpName::write) +  OPERAND_CASE(R600::OpName::omod) +  OPERAND_CASE(R600::OpName::dst_rel) +  OPERAND_CASE(R600::OpName::clamp) +  OPERAND_CASE(R600::OpName::src0) +  OPERAND_CASE(R600::OpName::src0_neg) +  OPERAND_CASE(R600::OpName::src0_rel) +  OPERAND_CASE(R600::OpName::src0_abs) +  OPERAND_CASE(R600::OpName::src0_sel) +  OPERAND_CASE(R600::OpName::src1) +  OPERAND_CASE(R600::OpName::src1_neg) +  OPERAND_CASE(R600::OpName::src1_rel) +  OPERAND_CASE(R600::OpName::src1_abs) +  OPERAND_CASE(R600::OpName::src1_sel) +  OPERAND_CASE(R600::OpName::pred_sel)    default:      llvm_unreachable("Wrong Operand");    } @@ -1313,39 +1318,39 @@ static unsigned getSlotedOps(unsigned  Op, unsigned Slot) {  MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(      MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg)      const { -  assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented"); +  assert (MI->getOpcode() == R600::DOT_4 && "Not Implemented");    unsigned Opcode;    if (ST.getGeneration() <= R600Subtarget::R700) -    Opcode = AMDGPU::DOT4_r600; +    Opcode = R600::DOT4_r600;    else -    Opcode = AMDGPU::DOT4_eg; +    Opcode = R600::DOT4_eg;    MachineBasicBlock::iterator I = MI;    MachineOperand &Src0 = MI->getOperand( -      getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot))); +      getOperandIdx(MI->getOpcode(), getSlotedOps(R600::OpName::src0, Slot)));    MachineOperand &Src1 = MI->getOperand( -      getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot))); +      getOperandIdx(MI->getOpcode(), getSlotedOps(R600::OpName::src1, Slot)));    MachineInstr *MIB = buildDefaultInstruction(        MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg());    static const unsigned  Operands[14] = { -    AMDGPU::OpName::update_exec_mask, -    AMDGPU::OpName::update_pred, -    AMDGPU::OpName::write, -    AMDGPU::OpName::omod, -    AMDGPU::OpName::dst_rel, -    AMDGPU::OpName::clamp, -    AMDGPU::OpName::src0_neg, -    AMDGPU::OpName::src0_rel, -    AMDGPU::OpName::src0_abs, -    AMDGPU::OpName::src0_sel, -    AMDGPU::OpName::src1_neg, -    AMDGPU::OpName::src1_rel, -    AMDGPU::OpName::src1_abs, -    AMDGPU::OpName::src1_sel, +    R600::OpName::update_exec_mask, +    R600::OpName::update_pred, +    R600::OpName::write, +    R600::OpName::omod, +    R600::OpName::dst_rel, +    R600::OpName::clamp, +    R600::OpName::src0_neg, +    R600::OpName::src0_rel, +    R600::OpName::src0_abs, +    R600::OpName::src0_sel, +    R600::OpName::src1_neg, +    R600::OpName::src1_rel, +    R600::OpName::src1_abs, +    R600::OpName::src1_sel,    };    MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(), -      getSlotedOps(AMDGPU::OpName::pred_sel, Slot))); -  MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel)) +      getSlotedOps(R600::OpName::pred_sel, Slot))); +  MIB->getOperand(getOperandIdx(Opcode, R600::OpName::pred_sel))        .setReg(MO.getReg());    for (unsigned i = 0; i < 14; i++) { @@ -1362,16 +1367,16 @@ MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,                                           MachineBasicBlock::iterator I,                                           unsigned DstReg,                                           uint64_t Imm) const { -  MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg, -                                                  AMDGPU::ALU_LITERAL_X); -  setImmOperand(*MovImm, AMDGPU::OpName::literal, Imm); +  MachineInstr *MovImm = buildDefaultInstruction(BB, I, R600::MOV, DstReg, +                                                  R600::ALU_LITERAL_X); +  setImmOperand(*MovImm, R600::OpName::literal, Imm);    return MovImm;  }  MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB,                                         MachineBasicBlock::iterator I,                                         unsigned DstReg, unsigned SrcReg) const { -  return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg); +  return buildDefaultInstruction(*MBB, I, R600::MOV, DstReg, SrcReg);  }  int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const { @@ -1379,7 +1384,7 @@ int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {  }  int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const { -  return AMDGPU::getNamedOperandIdx(Opcode, Op); +  return R600::getNamedOperandIdx(Opcode, Op);  }  void R600InstrInfo::setImmOperand(MachineInstr &MI, unsigned Op, @@ -1406,25 +1411,25 @@ MachineOperand &R600InstrInfo::getFlagOp(MachineInstr &MI, unsigned SrcIdx,      bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;      switch (Flag) {      case MO_FLAG_CLAMP: -      FlagIndex = getOperandIdx(MI, AMDGPU::OpName::clamp); +      FlagIndex = getOperandIdx(MI, R600::OpName::clamp);        break;      case MO_FLAG_MASK: -      FlagIndex = getOperandIdx(MI, AMDGPU::OpName::write); +      FlagIndex = getOperandIdx(MI, R600::OpName::write);        break;      case MO_FLAG_NOT_LAST:      case MO_FLAG_LAST: -      FlagIndex = getOperandIdx(MI, AMDGPU::OpName::last); +      FlagIndex = getOperandIdx(MI, R600::OpName::last);        break;      case MO_FLAG_NEG:        switch (SrcIdx) {        case 0: -        FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_neg); +        FlagIndex = getOperandIdx(MI, R600::OpName::src0_neg);          break;        case 1: -        FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_neg); +        FlagIndex = getOperandIdx(MI, R600::OpName::src1_neg);          break;        case 2: -        FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src2_neg); +        FlagIndex = getOperandIdx(MI, R600::OpName::src2_neg);          break;        }        break; @@ -1435,10 +1440,10 @@ MachineOperand &R600InstrInfo::getFlagOp(MachineInstr &MI, unsigned SrcIdx,        (void)IsOP3;        switch (SrcIdx) {        case 0: -        FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_abs); +        FlagIndex = getOperandIdx(MI, R600::OpName::src0_abs);          break;        case 1: -        FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_abs); +        FlagIndex = getOperandIdx(MI, R600::OpName::src1_abs);          break;        }        break; @@ -1499,15 +1504,15 @@ unsigned R600InstrInfo::getAddressSpaceForPseudoSourceKind(    switch (Kind) {    case PseudoSourceValue::Stack:    case PseudoSourceValue::FixedStack: -    return AMDGPUASI.PRIVATE_ADDRESS; +    return ST.getAMDGPUAS().PRIVATE_ADDRESS;    case PseudoSourceValue::ConstantPool:    case PseudoSourceValue::GOT:    case PseudoSourceValue::JumpTable:    case PseudoSourceValue::GlobalValueCallEntry:    case PseudoSourceValue::ExternalSymbolCallEntry:    case PseudoSourceValue::TargetCustom: -    return AMDGPUASI.CONSTANT_ADDRESS; +    return ST.getAMDGPUAS().CONSTANT_ADDRESS;    }    llvm_unreachable("Invalid pseudo source kind"); -  return AMDGPUASI.PRIVATE_ADDRESS; +  return ST.getAMDGPUAS().PRIVATE_ADDRESS;  } diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.h b/llvm/lib/Target/AMDGPU/R600InstrInfo.h index 0af17d01c94..7a3dece3166 100644 --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.h +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.h @@ -15,8 +15,11 @@  #ifndef LLVM_LIB_TARGET_AMDGPU_R600INSTRINFO_H  #define LLVM_LIB_TARGET_AMDGPU_R600INSTRINFO_H -#include "AMDGPUInstrInfo.h"  #include "R600RegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" + +#define GET_INSTRINFO_HEADER +#include "R600GenInstrInfo.inc"  namespace llvm { @@ -34,7 +37,7 @@ class MachineInstr;  class MachineInstrBuilder;  class R600Subtarget; -class R600InstrInfo final : public AMDGPUInstrInfo { +class R600InstrInfo final : public R600GenInstrInfo {  private:    const R600RegisterInfo RI;    const R600Subtarget &ST; @@ -324,7 +327,7 @@ public:        PseudoSourceValue::PSVKind Kind) const override;  }; -namespace AMDGPU { +namespace R600 {  int getLDSNoRetOp(uint16_t Opcode); diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td index b4b4a5ca3f9..7bf174f4cd8 100644 --- a/llvm/lib/Target/AMDGPU/R600Instructions.td +++ b/llvm/lib/Target/AMDGPU/R600Instructions.td @@ -18,13 +18,13 @@ include "R600InstrFormats.td"  class R600WrapperInst <dag outs, dag ins, string asm = "", list<dag> pattern = []> :    AMDGPUInst<outs, ins, asm, pattern>, PredicateControl {    let SubtargetPredicate = isR600toCayman; +  let Namespace = "R600";  }  class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern = []> :      InstR600 <outs, ins, asm, pattern, NullALU> { -  let Namespace = "AMDGPU";  }  def MEMxi : Operand<iPTR> { @@ -86,6 +86,12 @@ def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;  def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),                                       (ops PRED_SEL_OFF)>; +let isTerminator = 1, isReturn = 1, hasCtrlDep = 1, +    usesCustomInserter = 1, Namespace = "R600" in { +  def RETURN : ILFormat<(outs), (ins variable_ops), +    "RETURN", [(AMDGPUendpgm)] +  >; +}  let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { @@ -219,34 +225,6 @@ class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,  } // End mayLoad = 1, mayStore = 0, hasSideEffects = 0 -def TEX_SHADOW : PatLeaf< -  (imm), -  [{uint32_t TType = (uint32_t)N->getZExtValue(); -    return (TType >= 6 && TType <= 8) || TType == 13; -  }] ->; - -def TEX_RECT : PatLeaf< -  (imm), -  [{uint32_t TType = (uint32_t)N->getZExtValue(); -    return TType == 5; -  }] ->; - -def TEX_ARRAY : PatLeaf< -  (imm), -  [{uint32_t TType = (uint32_t)N->getZExtValue(); -    return TType == 9 || TType == 10 || TType == 16; -  }] ->; - -def TEX_SHADOW_ARRAY : PatLeaf< -  (imm), -  [{uint32_t TType = (uint32_t)N->getZExtValue(); -    return TType == 11 || TType == 12 || TType == 17; -  }] ->; -  class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> ratid, bits<4> mask,                   dag outs, dag ins, string asm, list<dag> pattern> :      InstR600ISA <outs, ins, asm, pattern>, @@ -357,6 +335,8 @@ def vtx_id2_load : LoadVtxId2 <load>;  // R600 SDNodes  //===----------------------------------------------------------------------===// +let Namespace = "R600" in { +  def INTERP_PAIR_XY :  AMDGPUShaderInst <    (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1),    (ins i32imm:$src0, R600_TReg32_Y:$src1, R600_TReg32_X:$src2), @@ -369,6 +349,8 @@ def INTERP_PAIR_ZW :  AMDGPUShaderInst <    "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1",    []>; +} +  def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",    SDTypeProfile<1, -1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,    [SDNPVariadic] @@ -416,11 +398,15 @@ def : R600Pat<(TEXTURE_FETCH (i32 TextureOp), vt:$SRC_GPR,  // Interpolation Instructions  //===----------------------------------------------------------------------===// +let Namespace = "R600" in { +  def INTERP_VEC_LOAD :  AMDGPUShaderInst <    (outs R600_Reg128:$dst),    (ins i32imm:$src0),    "INTERP_LOAD $src0 : $dst">; +} +  def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {    let bank_swizzle = 5;  } @@ -660,7 +646,7 @@ def PAD : R600WrapperInst <(outs), (ins), "PAD", [] > {  let isCodeGenOnly = 1, isPseudo = 1 in { -let usesCustomInserter = 1  in { +let Namespace = "R600", usesCustomInserter = 1  in {  class FABS <RegisterClass rc> : AMDGPUShaderInst <    (outs rc:$dst), @@ -792,7 +778,9 @@ class MOV_IMM <ValueType vt, Operand immType> : R600WrapperInst <    (ins immType:$imm),    "",    [] ->; +> { +  let Namespace = "R600"; +}  } // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 @@ -1007,7 +995,7 @@ class CNDGE_Common <bits<5> inst> : R600_3OP <  } -let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"  in { +let isCodeGenOnly = 1, isPseudo = 1, Namespace = "R600"  in {  class R600_VEC2OP<list<dag> pattern> : InstR600 <(outs R600_Reg32:$dst), (ins  // Slot X     UEM:$update_exec_mask_X, UP:$update_pred_X, WRITE:$write_X, @@ -1326,7 +1314,9 @@ let Predicates = [isR600] in {  // Regist loads and stores - for indirect addressing  //===----------------------------------------------------------------------===// +let Namespace = "R600" in {  defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>; +}  // Hardcode channel to 0  // NOTE: LSHR is not available here. LSHR is per family instruction @@ -1378,11 +1368,12 @@ let usesCustomInserter = 1 in {  let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in { -def MASK_WRITE : AMDGPUShaderInst < +def MASK_WRITE : InstR600 <      (outs),      (ins R600_Reg32:$src),      "MASK_WRITE $src", -    [] +    [], +    NullALU  >;  } // End mayLoad = 0, mayStore = 0, hasSideEffects = 1 @@ -1413,7 +1404,7 @@ def TXD_SHADOW: InstR600 <  // Constant Buffer Addressing Support  //===----------------------------------------------------------------------===// -let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"  in { +let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "R600"  in {  def CONST_COPY : Instruction {    let OutOperandList = (outs R600_Reg32:$dst);    let InOperandList = (ins i32imm:$src); @@ -1536,23 +1527,6 @@ let Inst{63-32} = Word1;  //===---------------------------------------------------------------------===//  // Flow and Program control Instructions  //===---------------------------------------------------------------------===// -class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern> -: Instruction { - -     let Namespace = "AMDGPU"; -     dag OutOperandList = outs; -     dag InOperandList = ins; -     let Pattern = pattern; -     let AsmString = !strconcat(asmstr, "\n"); -     let isPseudo = 1; -     let Itinerary = NullALU; -     bit hasIEEEFlag = 0; -     bit hasZeroOpFlag = 0; -     let mayLoad = 0; -     let mayStore = 0; -     let hasSideEffects = 0; -     let isCodeGenOnly = 1; -}  multiclass BranchConditional<SDNode Op, RegisterClass rci, RegisterClass rcf> {      def _i32 : ILFormat<(outs), @@ -1584,23 +1558,14 @@ multiclass BranchInstr2<string name> {  // Custom Inserter for Branches and returns, this eventually will be a  // separate pass  //===---------------------------------------------------------------------===// -let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in { +let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1, +    Namespace = "R600" in {    def BRANCH : ILFormat<(outs), (ins brtarget:$target),        "; Pseudo unconditional branch instruction",        [(br bb:$target)]>;    defm BRANCH_COND : BranchConditional<IL_brcond, R600_Reg32, R600_Reg32>;  } -//===---------------------------------------------------------------------===// -// Return instruction -//===---------------------------------------------------------------------===// -let isTerminator = 1, isReturn = 1, hasCtrlDep = 1, -    usesCustomInserter = 1 in { -  def RETURN : ILFormat<(outs), (ins variable_ops), -    "RETURN", [(AMDGPUendpgm)] -  >; -} -  //===----------------------------------------------------------------------===//  // Branch Instructions  //===----------------------------------------------------------------------===// @@ -1731,7 +1696,7 @@ def : R600Pat <  // KIL Patterns  def KIL : R600Pat < -  (int_AMDGPU_kill f32:$src0), +  (int_r600_kill f32:$src0),    (MASK_WRITE (KILLGT (f32 ZERO), $src0))  >; diff --git a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp index dbe83bcc4fc..afded915982 100644 --- a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp +++ b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp @@ -162,7 +162,7 @@ void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {        for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),            E = SU->getInstr()->operands_end(); It != E; ++It) {          MachineOperand &MO = *It; -        if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X) +        if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X)            ++CurEmitted;        }      } @@ -181,7 +181,7 @@ void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {  static bool  isPhysicalRegCopy(MachineInstr *MI) { -  if (MI->getOpcode() != AMDGPU::COPY) +  if (MI->getOpcode() != R600::COPY)      return false;    return !TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg()); @@ -224,14 +224,14 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {      return AluTrans;    switch (MI->getOpcode()) { -  case AMDGPU::PRED_X: +  case R600::PRED_X:      return AluPredX; -  case AMDGPU::INTERP_PAIR_XY: -  case AMDGPU::INTERP_PAIR_ZW: -  case AMDGPU::INTERP_VEC_LOAD: -  case AMDGPU::DOT_4: +  case R600::INTERP_PAIR_XY: +  case R600::INTERP_PAIR_ZW: +  case R600::INTERP_VEC_LOAD: +  case R600::DOT_4:      return AluT_XYZW; -  case AMDGPU::COPY: +  case R600::COPY:      if (MI->getOperand(1).isUndef()) {        // MI will become a KILL, don't considers it in scheduling        return AluDiscarded; @@ -246,7 +246,7 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {    if(TII->isVector(*MI) ||       TII->isCubeOp(MI->getOpcode()) ||       TII->isReductionOp(MI->getOpcode()) || -     MI->getOpcode() == AMDGPU::GROUP_BARRIER) { +     MI->getOpcode() == R600::GROUP_BARRIER) {      return AluT_XYZW;    } @@ -257,13 +257,13 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {    // Is the result already assigned to a channel ?    unsigned DestSubReg = MI->getOperand(0).getSubReg();    switch (DestSubReg) { -  case AMDGPU::sub0: +  case R600::sub0:      return AluT_X; -  case AMDGPU::sub1: +  case R600::sub1:      return AluT_Y; -  case AMDGPU::sub2: +  case R600::sub2:      return AluT_Z; -  case AMDGPU::sub3: +  case R600::sub3:      return AluT_W;    default:      break; @@ -271,16 +271,16 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {    // Is the result already member of a X/Y/Z/W class ?    unsigned DestReg = MI->getOperand(0).getReg(); -  if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) || -      regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass)) +  if (regBelongsToClass(DestReg, &R600::R600_TReg32_XRegClass) || +      regBelongsToClass(DestReg, &R600::R600_AddrRegClass))      return AluT_X; -  if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass)) +  if (regBelongsToClass(DestReg, &R600::R600_TReg32_YRegClass))      return AluT_Y; -  if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass)) +  if (regBelongsToClass(DestReg, &R600::R600_TReg32_ZRegClass))      return AluT_Z; -  if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass)) +  if (regBelongsToClass(DestReg, &R600::R600_TReg32_WRegClass))      return AluT_W; -  if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass)) +  if (regBelongsToClass(DestReg, &R600::R600_Reg128RegClass))      return AluT_XYZW;    // LDS src registers cannot be used in the Trans slot. @@ -301,13 +301,13 @@ int R600SchedStrategy::getInstKind(SUnit* SU) {    }    switch (Opcode) { -  case AMDGPU::PRED_X: -  case AMDGPU::COPY: -  case AMDGPU::CONST_COPY: -  case AMDGPU::INTERP_PAIR_XY: -  case AMDGPU::INTERP_PAIR_ZW: -  case AMDGPU::INTERP_VEC_LOAD: -  case AMDGPU::DOT_4: +  case R600::PRED_X: +  case R600::COPY: +  case R600::CONST_COPY: +  case R600::INTERP_PAIR_XY: +  case R600::INTERP_PAIR_ZW: +  case R600::INTERP_VEC_LOAD: +  case R600::DOT_4:      return IDAlu;    default:      return IDOther; @@ -353,7 +353,7 @@ void R600SchedStrategy::PrepareNextSlot() {  }  void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) { -  int DstIndex = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst); +  int DstIndex = TII->getOperandIdx(MI->getOpcode(), R600::OpName::dst);    if (DstIndex == -1) {      return;    } @@ -370,16 +370,16 @@ void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {    // Constrains the regclass of DestReg to assign it to Slot    switch (Slot) {    case 0: -    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass); +    MRI->constrainRegClass(DestReg, &R600::R600_TReg32_XRegClass);      break;    case 1: -    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass); +    MRI->constrainRegClass(DestReg, &R600::R600_TReg32_YRegClass);      break;    case 2: -    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass); +    MRI->constrainRegClass(DestReg, &R600::R600_TReg32_ZRegClass);      break;    case 3: -    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass); +    MRI->constrainRegClass(DestReg, &R600::R600_TReg32_WRegClass);      break;    }  } diff --git a/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp index b6e641aa2a4..692451cb8fe 100644 --- a/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp +++ b/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp @@ -79,7 +79,7 @@ public:    std::vector<unsigned> UndefReg;    RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) { -    assert(MI->getOpcode() == AMDGPU::REG_SEQUENCE); +    assert(MI->getOpcode() == R600::REG_SEQUENCE);      for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) {        MachineOperand &MO = Instr->getOperand(i);        unsigned Chan = Instr->getOperand(i + 1).getImm(); @@ -159,8 +159,8 @@ bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI)    if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)      return true;    switch (MI.getOpcode()) { -  case AMDGPU::R600_ExportSwz: -  case AMDGPU::EG_ExportSwz: +  case R600::R600_ExportSwz: +  case R600::EG_ExportSwz:      return true;    default:      return false; @@ -213,12 +213,12 @@ MachineInstr *R600VectorRegMerger::RebuildVector(    std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg;    for (DenseMap<unsigned, unsigned>::iterator It = RSI->RegToChan.begin(),        E = RSI->RegToChan.end(); It != E; ++It) { -    unsigned DstReg = MRI->createVirtualRegister(&AMDGPU::R600_Reg128RegClass); +    unsigned DstReg = MRI->createVirtualRegister(&R600::R600_Reg128RegClass);      unsigned SubReg = (*It).first;      unsigned Swizzle = (*It).second;      unsigned Chan = getReassignedChan(RemapChan, Swizzle); -    MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::INSERT_SUBREG), +    MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(R600::INSERT_SUBREG),          DstReg)          .addReg(SrcVec)          .addReg(SubReg) @@ -234,7 +234,7 @@ MachineInstr *R600VectorRegMerger::RebuildVector(      SrcVec = DstReg;    }    MachineInstr *NewMI = -      BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg).addReg(SrcVec); +      BuildMI(MBB, Pos, DL, TII->get(R600::COPY), Reg).addReg(SrcVec);    LLVM_DEBUG(dbgs() << "    ->"; NewMI->dump(););    LLVM_DEBUG(dbgs() << "  Updating Swizzle:\n"); @@ -354,7 +354,7 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {      for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end();           MII != MIIE; ++MII) {        MachineInstr &MI = *MII; -      if (MI.getOpcode() != AMDGPU::REG_SEQUENCE) { +      if (MI.getOpcode() != R600::REG_SEQUENCE) {          if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) {            unsigned Reg = MI.getOperand(1).getReg();            for (MachineRegisterInfo::def_instr_iterator diff --git a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp index 541711803ef..612c62b514f 100644 --- a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp +++ b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp @@ -84,39 +84,39 @@ private:        LastDstChan = BISlot;        if (TII->isPredicated(*BI))          continue; -      int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write); +      int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::write);        if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)          continue; -      int DstIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::dst); +      int DstIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::dst);        if (DstIdx == -1) {          continue;        }        unsigned Dst = BI->getOperand(DstIdx).getReg();        if (isTrans || TII->isTransOnly(*BI)) { -        Result[Dst] = AMDGPU::PS; +        Result[Dst] = R600::PS;          continue;        } -      if (BI->getOpcode() == AMDGPU::DOT4_r600 || -          BI->getOpcode() == AMDGPU::DOT4_eg) { -        Result[Dst] = AMDGPU::PV_X; +      if (BI->getOpcode() == R600::DOT4_r600 || +          BI->getOpcode() == R600::DOT4_eg) { +        Result[Dst] = R600::PV_X;          continue;        } -      if (Dst == AMDGPU::OQAP) { +      if (Dst == R600::OQAP) {          continue;        }        unsigned PVReg = 0;        switch (TRI.getHWRegChan(Dst)) {        case 0: -        PVReg = AMDGPU::PV_X; +        PVReg = R600::PV_X;          break;        case 1: -        PVReg = AMDGPU::PV_Y; +        PVReg = R600::PV_Y;          break;        case 2: -        PVReg = AMDGPU::PV_Z; +        PVReg = R600::PV_Z;          break;        case 3: -        PVReg = AMDGPU::PV_W; +        PVReg = R600::PV_W;          break;        default:          llvm_unreachable("Invalid Chan"); @@ -129,9 +129,9 @@ private:    void substitutePV(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PVs)        const {      unsigned Ops[] = { -      AMDGPU::OpName::src0, -      AMDGPU::OpName::src1, -      AMDGPU::OpName::src2 +      R600::OpName::src0, +      R600::OpName::src1, +      R600::OpName::src2      };      for (unsigned i = 0; i < 3; i++) {        int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Ops[i]); @@ -171,7 +171,7 @@ public:        return true;      if (!TII->isALUInstr(MI.getOpcode()))        return true; -    if (MI.getOpcode() == AMDGPU::GROUP_BARRIER) +    if (MI.getOpcode() == R600::GROUP_BARRIER)        return true;      // XXX: This can be removed once the packetizer properly handles all the      // LDS instruction group restrictions. @@ -185,8 +185,8 @@ public:      if (getSlot(*MII) == getSlot(*MIJ))        ConsideredInstUsesAlreadyWrittenVectorElement = true;      // Does MII and MIJ share the same pred_sel ? -    int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel), -        OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel); +    int OpI = TII->getOperandIdx(MII->getOpcode(), R600::OpName::pred_sel), +        OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600::OpName::pred_sel);      unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0,          PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;      if (PredI != PredJ) @@ -220,7 +220,7 @@ public:    }    void setIsLastBit(MachineInstr *MI, unsigned Bit) const { -    unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::last); +    unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600::OpName::last);      MI->getOperand(LastOp).setImm(Bit);    } @@ -301,11 +301,11 @@ public:        for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {          MachineInstr *MI = CurrentPacketMIs[i];          unsigned Op = TII->getOperandIdx(MI->getOpcode(), -            AMDGPU::OpName::bank_swizzle); +            R600::OpName::bank_swizzle);          MI->getOperand(Op).setImm(BS[i]);        }        unsigned Op = -          TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::bank_swizzle); +          TII->getOperandIdx(MI.getOpcode(), R600::OpName::bank_swizzle);        MI.getOperand(Op).setImm(BS.back());        if (!CurrentPacketMIs.empty())          setIsLastBit(CurrentPacketMIs.back(), 0); @@ -334,6 +334,7 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {    // DFA state table should not be empty.    assert(Packetizer.getResourceTracker() && "Empty DFA table!"); +  assert(Packetizer.getResourceTracker()->getInstrItins());    if (Packetizer.getResourceTracker()->getInstrItins()->isEmpty())      return false; @@ -353,8 +354,8 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {      MachineBasicBlock::iterator End = MBB->end();      MachineBasicBlock::iterator MI = MBB->begin();      while (MI != End) { -      if (MI->isKill() || MI->getOpcode() == AMDGPU::IMPLICIT_DEF || -          (MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) { +      if (MI->isKill() || MI->getOpcode() == R600::IMPLICIT_DEF || +          (MI->getOpcode() == R600::CF_ALU && !MI->getOperand(8).getImm())) {          MachineBasicBlock::iterator DeleteMI = MI;          ++MI;          MBB->erase(DeleteMI); diff --git a/llvm/lib/Target/AMDGPU/R600Processors.td b/llvm/lib/Target/AMDGPU/R600Processors.td index 89194dc1bdf..f39b3dc1bfd 100644 --- a/llvm/lib/Target/AMDGPU/R600Processors.td +++ b/llvm/lib/Target/AMDGPU/R600Processors.td @@ -7,6 +7,62 @@  //  //===----------------------------------------------------------------------===// +class SubtargetFeatureFetchLimit <string Value> : +                          SubtargetFeature <"fetch"#Value, +  "TexVTXClauseSize", +  Value, +  "Limit the maximum number of fetches in a clause to "#Value +>; + +def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst", +  "R600ALUInst", +  "false", +  "Older version of ALU instructions encoding" +>; + +def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">; +def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">; + +def FeatureVertexCache : SubtargetFeature<"HasVertexCache", +  "HasVertexCache", +  "true", +  "Specify use of dedicated vertex cache" +>; + +def FeatureCaymanISA : SubtargetFeature<"caymanISA", +  "CaymanISA", +  "true", +  "Use Cayman ISA" +>; + +def FeatureCFALUBug : SubtargetFeature<"cfalubug", +  "CFALUBug", +  "true", +  "GPU has CF_ALU bug" +>; + +class R600SubtargetFeatureGeneration <string Value, +                                  list<SubtargetFeature> Implies> : +        SubtargetFeatureGeneration <Value, "R600Subtarget", Implies>; + +def FeatureR600 : R600SubtargetFeatureGeneration<"R600", +  [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0] +>; + +def FeatureR700 : R600SubtargetFeatureGeneration<"R700", +  [FeatureFetchLimit16, FeatureLocalMemorySize0] +>; + +def FeatureEvergreen : R600SubtargetFeatureGeneration<"EVERGREEN", +  [FeatureFetchLimit16, FeatureLocalMemorySize32768] +>; + +def FeatureNorthernIslands : R600SubtargetFeatureGeneration<"NORTHERN_ISLANDS", +  [FeatureFetchLimit16, FeatureWavefrontSize64, +   FeatureLocalMemorySize32768] +>; + +  //===----------------------------------------------------------------------===//  // Radeon HD 2000/3000 Series (R600).  //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp b/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp index 3ce646ff0d2..38933e7616a 100644 --- a/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp @@ -21,34 +21,37 @@  using namespace llvm; -R600RegisterInfo::R600RegisterInfo() : AMDGPURegisterInfo() { +R600RegisterInfo::R600RegisterInfo() : R600GenRegisterInfo(0) {    RCW.RegWeight = 0;    RCW.WeightLimit = 0;  } +#define GET_REGINFO_TARGET_DESC +#include "R600GenRegisterInfo.inc" +  BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {    BitVector Reserved(getNumRegs());    const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();    const R600InstrInfo *TII = ST.getInstrInfo(); -  reserveRegisterTuples(Reserved, AMDGPU::ZERO); -  reserveRegisterTuples(Reserved, AMDGPU::HALF); -  reserveRegisterTuples(Reserved, AMDGPU::ONE); -  reserveRegisterTuples(Reserved, AMDGPU::ONE_INT); -  reserveRegisterTuples(Reserved, AMDGPU::NEG_HALF); -  reserveRegisterTuples(Reserved, AMDGPU::NEG_ONE); -  reserveRegisterTuples(Reserved, AMDGPU::PV_X); -  reserveRegisterTuples(Reserved, AMDGPU::ALU_LITERAL_X); -  reserveRegisterTuples(Reserved, AMDGPU::ALU_CONST); -  reserveRegisterTuples(Reserved, AMDGPU::PREDICATE_BIT); -  reserveRegisterTuples(Reserved, AMDGPU::PRED_SEL_OFF); -  reserveRegisterTuples(Reserved, AMDGPU::PRED_SEL_ZERO); -  reserveRegisterTuples(Reserved, AMDGPU::PRED_SEL_ONE); -  reserveRegisterTuples(Reserved, AMDGPU::INDIRECT_BASE_ADDR); - -  for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(), -                        E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) { +  reserveRegisterTuples(Reserved, R600::ZERO); +  reserveRegisterTuples(Reserved, R600::HALF); +  reserveRegisterTuples(Reserved, R600::ONE); +  reserveRegisterTuples(Reserved, R600::ONE_INT); +  reserveRegisterTuples(Reserved, R600::NEG_HALF); +  reserveRegisterTuples(Reserved, R600::NEG_ONE); +  reserveRegisterTuples(Reserved, R600::PV_X); +  reserveRegisterTuples(Reserved, R600::ALU_LITERAL_X); +  reserveRegisterTuples(Reserved, R600::ALU_CONST); +  reserveRegisterTuples(Reserved, R600::PREDICATE_BIT); +  reserveRegisterTuples(Reserved, R600::PRED_SEL_OFF); +  reserveRegisterTuples(Reserved, R600::PRED_SEL_ZERO); +  reserveRegisterTuples(Reserved, R600::PRED_SEL_ONE); +  reserveRegisterTuples(Reserved, R600::INDIRECT_BASE_ADDR); + +  for (TargetRegisterClass::iterator I = R600::R600_AddrRegClass.begin(), +                        E = R600::R600_AddrRegClass.end(); I != E; ++I) {      reserveRegisterTuples(Reserved, *I);    } @@ -58,7 +61,7 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {  }  // Dummy to not crash RegisterClassInfo. -static const MCPhysReg CalleeSavedReg = AMDGPU::NoRegister; +static const MCPhysReg CalleeSavedReg = R600::NoRegister;  const MCPhysReg *R600RegisterInfo::getCalleeSavedRegs(    const MachineFunction *) const { @@ -66,7 +69,7 @@ const MCPhysReg *R600RegisterInfo::getCalleeSavedRegs(  }  unsigned R600RegisterInfo::getFrameRegister(const MachineFunction &MF) const { -  return AMDGPU::NoRegister; +  return R600::NoRegister;  }  unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const { @@ -81,7 +84,7 @@ const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(                                                                     MVT VT) const {    switch(VT.SimpleTy) {    default: -  case MVT::i32: return &AMDGPU::R600_TReg32RegClass; +  case MVT::i32: return &R600::R600_TReg32RegClass;    }  } @@ -94,9 +97,9 @@ bool R600RegisterInfo::isPhysRegLiveAcrossClauses(unsigned Reg) const {    assert(!TargetRegisterInfo::isVirtualRegister(Reg));    switch (Reg) { -  case AMDGPU::OQAP: -  case AMDGPU::OQBP: -  case AMDGPU::AR_X: +  case R600::OQAP: +  case R600::OQBP: +  case R600::AR_X:      return false;    default:      return true; @@ -109,3 +112,10 @@ void R600RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,                                             RegScavenger *RS) const {    llvm_unreachable("Subroutines not supported yet");  } + +void R600RegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const { +  MCRegAliasIterator R(Reg, this, true); + +  for (; R.isValid(); ++R) +    Reserved.set(*R); +} diff --git a/llvm/lib/Target/AMDGPU/R600RegisterInfo.h b/llvm/lib/Target/AMDGPU/R600RegisterInfo.h index 305878522dd..5bc4800bafd 100644 --- a/llvm/lib/Target/AMDGPU/R600RegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/R600RegisterInfo.h @@ -15,13 +15,14 @@  #ifndef LLVM_LIB_TARGET_AMDGPU_R600REGISTERINFO_H  #define LLVM_LIB_TARGET_AMDGPU_R600REGISTERINFO_H -#include "AMDGPURegisterInfo.h" +#define GET_REGINFO_HEADER +#include "R600GenRegisterInfo.inc"  namespace llvm {  class AMDGPUSubtarget; -struct R600RegisterInfo final : public AMDGPURegisterInfo { +struct R600RegisterInfo final : public R600GenRegisterInfo {    RegClassWeight RCW;    R600RegisterInfo(); @@ -49,6 +50,8 @@ struct R600RegisterInfo final : public AMDGPURegisterInfo {    void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,                             unsigned FIOperandNum,                             RegScavenger *RS = nullptr) const override; + +  void reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const;  };  } // End namespace llvm diff --git a/llvm/lib/Target/AMDGPU/R600RegisterInfo.td b/llvm/lib/Target/AMDGPU/R600RegisterInfo.td index 84ab328bdb2..02164b74a01 100644 --- a/llvm/lib/Target/AMDGPU/R600RegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/R600RegisterInfo.td @@ -245,7 +245,7 @@ def R600_Reg128Vertical : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,    (add V0123_W, V0123_Z, V0123_Y, V0123_X)  >; -def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32], 64, +def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32, i64, f64], 64,                                  (add (sequence "T%u_XY", 0, 63))>;  def R600_Reg64Vertical : RegisterClass<"AMDGPU", [v2f32, v2i32], 64, diff --git a/llvm/lib/Target/AMDGPU/R700Instructions.td b/llvm/lib/Target/AMDGPU/R700Instructions.td index 613a0d729bb..988b70d16cd 100644 --- a/llvm/lib/Target/AMDGPU/R700Instructions.td +++ b/llvm/lib/Target/AMDGPU/R700Instructions.td @@ -13,7 +13,7 @@  //  //===----------------------------------------------------------------------===// -def isR700 : Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::R700">; +def isR700 : Predicate<"Subtarget->getGeneration() == R600Subtarget::R700">;  let Predicates = [isR700] in {    def SIN_r700 : SIN_Common<0x6E>; diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 1bd049f150c..c9bbf5ca7c0 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -76,7 +76,7 @@ public:    MachineRegisterInfo *MRI;    const SIInstrInfo *TII;    const SIRegisterInfo *TRI; -  const SISubtarget *ST; +  const AMDGPUSubtarget *ST;    void foldOperand(MachineOperand &OpToFold,                     MachineInstr *UseMI, @@ -972,7 +972,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {      return false;    MRI = &MF.getRegInfo(); -  ST = &MF.getSubtarget<SISubtarget>(); +  ST = &MF.getSubtarget<AMDGPUSubtarget>();    TII = ST->getInstrInfo();    TRI = &TII->getRegisterInfo(); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index cd60f079752..1d4724a61eb 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -113,7 +113,8 @@ static unsigned findFirstFreeSGPR(CCState &CCInfo) {  SITargetLowering::SITargetLowering(const TargetMachine &TM,                                     const SISubtarget &STI) -    : AMDGPUTargetLowering(TM, STI) { +    : AMDGPUTargetLowering(TM, STI), +      Subtarget(&STI) {    addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);    addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass); @@ -147,7 +148,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,      addRegisterClass(MVT::v4f16, &AMDGPU::SReg_64RegClass);    } -  computeRegisterProperties(STI.getRegisterInfo()); +  computeRegisterProperties(Subtarget->getRegisterInfo());    // We need to custom lower vector stores from local memory    setOperationAction(ISD::LOAD, MVT::v2i32, Custom); @@ -323,7 +324,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,    setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Expand);    setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Expand); -  if (getSubtarget()->hasFlatAddressSpace()) { +  if (Subtarget->hasFlatAddressSpace()) {      setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);      setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);    } @@ -336,6 +337,44 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,    setOperationAction(ISD::TRAP, MVT::Other, Custom);    setOperationAction(ISD::DEBUGTRAP, MVT::Other, Custom); +  if (Subtarget->has16BitInsts()) { +    setOperationAction(ISD::FLOG, MVT::f16, Custom); +    setOperationAction(ISD::FLOG10, MVT::f16, Custom); +  } + +  // v_mad_f32 does not support denormals according to some sources. +  if (!Subtarget->hasFP32Denormals()) +    setOperationAction(ISD::FMAD, MVT::f32, Legal); + +  if (!Subtarget->hasBFI()) { +    // fcopysign can be done in a single instruction with BFI. +    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); +    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); +  } + +  if (!Subtarget->hasBCNT(32)) +    setOperationAction(ISD::CTPOP, MVT::i32, Expand); + +  if (!Subtarget->hasBCNT(64)) +    setOperationAction(ISD::CTPOP, MVT::i64, Expand); + +  if (Subtarget->hasFFBH()) +    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom); + +  if (Subtarget->hasFFBL()) +    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom); + +  // We only really have 32-bit BFE instructions (and 16-bit on VI). +  // +  // On SI+ there are 64-bit BFEs, but they are scalar only and there isn't any +  // effort to match them now. We want this to be false for i64 cases when the +  // extraction isn't restricted to the upper or lower half. Ideally we would +  // have some pass reduce 64-bit extracts to 32-bit if possible. Extracts that +  // span the midpoint are probably relatively rare, so don't worry about them +  // for now. +  if (Subtarget->hasBFE()) +    setHasExtractBitsInsn(true); +    setOperationAction(ISD::FMINNUM, MVT::f64, Legal);    setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); @@ -343,6 +382,11 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,      setOperationAction(ISD::FTRUNC, MVT::f64, Legal);      setOperationAction(ISD::FCEIL, MVT::f64, Legal);      setOperationAction(ISD::FRINT, MVT::f64, Legal); +  } else { +    setOperationAction(ISD::FCEIL, MVT::f64, Custom); +    setOperationAction(ISD::FTRUNC, MVT::f64, Custom); +    setOperationAction(ISD::FRINT, MVT::f64, Custom); +    setOperationAction(ISD::FFLOOR, MVT::f64, Custom);    }    setOperationAction(ISD::FFLOOR, MVT::f64, Legal); @@ -616,10 +660,15 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,    setTargetDAGCombine(ISD::ATOMIC_LOAD_UMAX);    setSchedulingPreference(Sched::RegPressure); + +  // SI at least has hardware support for floating point exceptions, but no way +  // of using or handling them is implemented. They are also optional in OpenCL +  // (Section 7.3) +  setHasFloatingPointExceptions(Subtarget->hasFPExceptions());  }  const SISubtarget *SITargetLowering::getSubtarget() const { -  return static_cast<const SISubtarget *>(Subtarget); +  return Subtarget;  }  //===----------------------------------------------------------------------===// @@ -2012,8 +2061,7 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,    // FIXME: Does sret work properly?    if (!Info->isEntryFunction()) { -    const SIRegisterInfo *TRI -      = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo(); +    const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();      const MCPhysReg *I =        TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());      if (I) { @@ -2115,8 +2163,7 @@ void SITargetLowering::passSpecialInputs(    SelectionDAG &DAG = CLI.DAG;    const SDLoc &DL = CLI.DL; -  const SISubtarget *ST = getSubtarget(); -  const SIRegisterInfo *TRI = ST->getRegisterInfo(); +  const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();    auto &ArgUsageInfo =      DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfo>(); @@ -2561,7 +2608,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,    // Add a register mask operand representing the call-preserved registers. -  const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); +  auto *TRI = static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo());    const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);    assert(Mask && "Missing call preserved mask for calling convention");    Ops.push_back(DAG.getRegisterMask(Mask)); @@ -8179,8 +8226,7 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {    MachineRegisterInfo &MRI = MF.getRegInfo();    SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();    const MachineFrameInfo &MFI = MF.getFrameInfo(); -  const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); -  const SIRegisterInfo *TRI = ST.getRegisterInfo(); +  const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();    if (Info->isEntryFunction()) {      // Callable functions have fixed registers used for stack access. diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index f18ce112dc8..f64694846e4 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -22,6 +22,9 @@  namespace llvm {  class SITargetLowering final : public AMDGPUTargetLowering { +private: +  const SISubtarget *Subtarget; +    SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,                                     SDValue Chain, uint64_t Offset) const;    SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const; diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 658dfa38b7c..c404a6d291b 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -934,8 +934,7 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(    // All waits must be resolved at call return.    // NOTE: this could be improved with knowledge of all call sites or    //   with knowledge of the called routines. -  if (MI.getOpcode() == AMDGPU::RETURN || -      MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG || +  if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||        MI.getOpcode() == AMDGPU::S_SETPC_B64_return) {      for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;           T = (enum InstCounterType)(T + 1)) { @@ -1131,7 +1130,7 @@ void SIInsertWaitcnts::generateWaitcntInstBefore(    // TODO: Remove this work-around, enable the assert for Bug 457939    //       after fixing the scheduler. Also, the Shader Compiler code is    //       independent of target. -  if (readsVCCZ(MI) && ST->getGeneration() <= SISubtarget::SEA_ISLANDS) { +  if (readsVCCZ(MI) && ST->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) {      if (ScoreBrackets->getScoreLB(LGKM_CNT) <              ScoreBrackets->getScoreUB(LGKM_CNT) &&          ScoreBrackets->hasPendingSMEM()) { @@ -1716,7 +1715,7 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,        if (ScoreBrackets->getScoreLB(LGKM_CNT) <                ScoreBrackets->getScoreUB(LGKM_CNT) &&            ScoreBrackets->hasPendingSMEM()) { -        if (ST->getGeneration() <= SISubtarget::SEA_ISLANDS) +        if (ST->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)            VCCZBugWorkAround = true;        }      } diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td index cce41066976..29dc39f0baf 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -21,7 +21,7 @@ def isSI : Predicate<"Subtarget->getGeneration() "  class InstSI <dag outs, dag ins, string asm = "",                list<dag> pattern = []> : -  AMDGPUInst<outs, ins, asm, pattern>, PredicateControl { +  AMDGPUInst<outs, ins, asm, pattern>, GCNPredicateControl {    let SubtargetPredicate = isGCN;    // Low bits - basic encoding information. diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 92d719838b9..997b41e4b1a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -14,6 +14,7 @@  #include "SIInstrInfo.h"  #include "AMDGPU.h" +#include "AMDGPUIntrinsicInfo.h"  #include "AMDGPUSubtarget.h"  #include "GCNHazardRecognizer.h"  #include "SIDefines.h" @@ -63,6 +64,19 @@  using namespace llvm; +#define GET_INSTRINFO_CTOR_DTOR +#include "AMDGPUGenInstrInfo.inc" + +namespace llvm { +namespace AMDGPU { +#define GET_D16ImageDimIntrinsics_IMPL +#define GET_ImageDimIntrinsicTable_IMPL +#define GET_RsrcIntrinsics_IMPL +#include "AMDGPUGenSearchableTables.inc" +} +} + +  // Must be at least 4 to be able to branch over minimum unconditional branch  // code. This is only for making it possible to write reasonably small tests for  // long branches. @@ -71,7 +85,8 @@ BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),                   cl::desc("Restrict range of branch instructions (DEBUG)"));  SIInstrInfo::SIInstrInfo(const SISubtarget &ST) -  : AMDGPUInstrInfo(ST), RI(ST), ST(ST) {} +  : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN), +    RI(ST), ST(ST) {}  //===----------------------------------------------------------------------===//  // TargetInstrInfo callbacks @@ -438,6 +453,28 @@ bool SIInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,    return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold;  } +// FIXME: This behaves strangely. If, for example, you have 32 load + stores, +// the first 16 loads will be interleaved with the stores, and the next 16 will +// be clustered as expected. It should really split into 2 16 store batches. +// +// Loads are clustered until this returns false, rather than trying to schedule +// groups of stores. This also means we have to deal with saying different +// address space loads should be clustered, and ones which might cause bank +// conflicts. +// +// This might be deprecated so it might not be worth that much effort to fix. +bool SIInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, +                                          int64_t Offset0, int64_t Offset1, +                                          unsigned NumLoads) const { +  assert(Offset1 > Offset0 && +         "Second offset should be larger than first offset!"); +  // If we have less than 16 loads in a row, and the offsets are within 64 +  // bytes, then schedule together. + +  // A cacheline is 64 bytes (for global memory). +  return (NumLoads <= 16 && (Offset1 - Offset0) < 64); +} +  static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,                                MachineBasicBlock::iterator MI,                                const DebugLoc &DL, unsigned DestReg, @@ -998,7 +1035,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(      unsigned FrameOffset, unsigned Size) const {    MachineFunction *MF = MBB.getParent();    SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); -  const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); +  const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>();    DebugLoc DL = MBB.findDebugLoc(MI);    unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize();    unsigned WavefrontSize = ST.getWavefrontSize(); @@ -1134,7 +1171,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {    MachineBasicBlock &MBB = *MI.getParent();    DebugLoc DL = MBB.findDebugLoc(MI);    switch (MI.getOpcode()) { -  default: return AMDGPUInstrInfo::expandPostRAPseudo(MI); +  default: return TargetInstrInfo::expandPostRAPseudo(MI);    case AMDGPU::S_MOV_B64_term:      // This is only a terminator to get the correct spill code placement during      // register allocation. @@ -1900,16 +1937,16 @@ unsigned SIInstrInfo::getAddressSpaceForPseudoSourceKind(    switch(Kind) {    case PseudoSourceValue::Stack:    case PseudoSourceValue::FixedStack: -    return AMDGPUASI.PRIVATE_ADDRESS; +    return ST.getAMDGPUAS().PRIVATE_ADDRESS;    case PseudoSourceValue::ConstantPool:    case PseudoSourceValue::GOT:    case PseudoSourceValue::JumpTable:    case PseudoSourceValue::GlobalValueCallEntry:    case PseudoSourceValue::ExternalSymbolCallEntry:    case PseudoSourceValue::TargetCustom: -    return AMDGPUASI.CONSTANT_ADDRESS; +    return ST.getAMDGPUAS().CONSTANT_ADDRESS;    } -  return AMDGPUASI.FLAT_ADDRESS; +  return ST.getAMDGPUAS().FLAT_ADDRESS;  }  static void removeModOperands(MachineInstr &MI) { @@ -4649,7 +4686,7 @@ unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI,      return AMDGPU::NoRegister;    assert(!MI.memoperands_empty() && -         (*MI.memoperands_begin())->getAddrSpace() == AMDGPUASI.PRIVATE_ADDRESS); +         (*MI.memoperands_begin())->getAddrSpace() == ST.getAMDGPUAS().PRIVATE_ADDRESS);    FrameIndex = Addr->getIndex();    return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); @@ -4768,7 +4805,7 @@ bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const {      return true;    for (const MachineMemOperand *MMO : MI.memoperands()) { -    if (MMO->getAddrSpace() == AMDGPUASI.FLAT_ADDRESS) +    if (MMO->getAddrSpace() == ST.getAMDGPUAS().FLAT_ADDRESS)        return true;    }    return false; @@ -4948,3 +4985,55 @@ bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const {    const auto RCID = MI.getDesc().OpInfo[Idx].RegClass;    return RCID == AMDGPU::SReg_128RegClassID;  } + +// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td +enum SIEncodingFamily { +  SI = 0, +  VI = 1, +  SDWA = 2, +  SDWA9 = 3, +  GFX80 = 4, +  GFX9 = 5 +}; + +static SIEncodingFamily subtargetEncodingFamily(const SISubtarget &ST) { +  switch (ST.getGeneration()) { +  case SISubtarget::SOUTHERN_ISLANDS: +  case SISubtarget::SEA_ISLANDS: +    return SIEncodingFamily::SI; +  case SISubtarget::VOLCANIC_ISLANDS: +  case SISubtarget::GFX9: +    return SIEncodingFamily::VI; +  } +  llvm_unreachable("Unknown subtarget generation!"); +} + +int SIInstrInfo::pseudoToMCOpcode(int Opcode) const { +  SIEncodingFamily Gen = subtargetEncodingFamily(ST); + +  if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 && +    ST.getGeneration() >= SISubtarget::GFX9) +    Gen = SIEncodingFamily::GFX9; + +  if (get(Opcode).TSFlags & SIInstrFlags::SDWA) +    Gen = ST.getGeneration() == SISubtarget::GFX9 ? SIEncodingFamily::SDWA9 +                                                      : SIEncodingFamily::SDWA; +  // Adjust the encoding family to GFX80 for D16 buffer instructions when the +  // subtarget has UnpackedD16VMem feature. +  // TODO: remove this when we discard GFX80 encoding. +  if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf)) +    Gen = SIEncodingFamily::GFX80; + +  int MCOp = AMDGPU::getMCOpcode(Opcode, Gen); + +  // -1 means that Opcode is already a native instruction. +  if (MCOp == -1) +    return Opcode; + +  // (uint16_t)-1 means that Opcode is a pseudo instruction that has +  // no encoding in the given subtarget generation. +  if (MCOp == (uint16_t)-1) +    return -1; + +  return MCOp; +} diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index a9038cfe6ae..a582f4cb2fc 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -31,6 +31,9 @@  #include <cassert>  #include <cstdint> +#define GET_INSTRINFO_HEADER +#include "AMDGPUGenInstrInfo.inc" +  namespace llvm {  class APInt; @@ -39,7 +42,7 @@ class RegScavenger;  class SISubtarget;  class TargetRegisterClass; -class SIInstrInfo final : public AMDGPUInstrInfo { +class SIInstrInfo final : public AMDGPUGenInstrInfo {  private:    const SIRegisterInfo RI;    const SISubtarget &ST; @@ -163,7 +166,10 @@ public:    bool shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1,                             MachineInstr &SecondLdSt, unsigned BaseReg2, -                           unsigned NumLoads) const final; +                           unsigned NumLoads) const override; + +  bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, +                               int64_t Offset1, unsigned NumLoads) const override;    void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,                     const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, @@ -871,6 +877,12 @@ public:    static bool isLegalMUBUFImmOffset(unsigned Imm) {      return isUInt<12>(Imm);    } + +  /// \brief Return a target-specific opcode if Opcode is a pseudo instruction. +  /// Return -1 if the target-specific opcode for the pseudo instruction does +  /// not exist. If Opcode is not a pseudo instruction, this is identity. +  int pseudoToMCOpcode(int Opcode) const; +  };  namespace AMDGPU { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index a5fe25627f0..e8d89aaed4d 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -17,6 +17,11 @@ def isVIOnly : Predicate<"Subtarget->getGeneration() =="  def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">; +class GCNPredicateControl : PredicateControl { +  Predicate SIAssemblerPredicate = isSICI; +  Predicate VIAssemblerPredicate = isVI; +} +  // Execpt for the NONE field, this must be kept in sync with the  // SIEncodingFamily enum in AMDGPUInstrInfo.cpp  def SIEncodingFamily { diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index cf0255db561..2f9cdec61ed 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -11,11 +11,10 @@  // that are not yet supported remain commented out.  //===----------------------------------------------------------------------===// -class GCNPat<dag pattern, dag result> : AMDGPUPat<pattern, result> { +class GCNPat<dag pattern, dag result> : Pat<pattern, result>, GCNPredicateControl {    let SubtargetPredicate = isGCN;  } -  include "VOPInstructions.td"  include "SOPInstructions.td"  include "SMInstructions.td" diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 2d31378511f..5ad3af79b72 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1232,8 +1232,6 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {      &AMDGPU::VReg_512RegClass,      &AMDGPU::SReg_512RegClass,      &AMDGPU::SCC_CLASSRegClass, -    &AMDGPU::R600_Reg32RegClass, -    &AMDGPU::R600_PredicateRegClass,      &AMDGPU::Pseudo_SReg_32RegClass,      &AMDGPU::Pseudo_SReg_128RegClass,    }; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index 6ca6592aa0f..caf0f5d9308 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -21,6 +21,7 @@  namespace llvm { +class AMDGPUSubtarget;  class LiveIntervals;  class MachineRegisterInfo;  class SISubtarget; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 08b7a71cbcc..3fd3c75874a 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -181,7 +181,7 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) {    if (Features.test(FeatureGFX9))      return {9, 0, 0}; -  if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) +  if (Features.test(FeatureSouthernIslands))      return {0, 0, 0};    return {7, 0, 0};  } @@ -243,7 +243,7 @@ unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,  }  unsigned getMaxWavesPerCU(const FeatureBitset &Features) { -  return getMaxWavesPerEU(Features) * getEUsPerCU(Features); +  return getMaxWavesPerEU() * getEUsPerCU(Features);  }  unsigned getMaxWavesPerCU(const FeatureBitset &Features, @@ -255,9 +255,7 @@ unsigned getMinWavesPerEU(const FeatureBitset &Features) {    return 1;  } -unsigned getMaxWavesPerEU(const FeatureBitset &Features) { -  if (!Features.test(FeatureGCN)) -    return 8; +unsigned getMaxWavesPerEU() {    // FIXME: Need to take scratch memory into account.    return 10;  } @@ -313,7 +311,7 @@ unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {  unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {    assert(WavesPerEU != 0); -  if (WavesPerEU >= getMaxWavesPerEU(Features)) +  if (WavesPerEU >= getMaxWavesPerEU())      return 0;    unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1); @@ -390,7 +388,7 @@ unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {  unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {    assert(WavesPerEU != 0); -  if (WavesPerEU >= getMaxWavesPerEU(Features)) +  if (WavesPerEU >= getMaxWavesPerEU())      return 0;    unsigned MinNumVGPRs =        alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1), @@ -735,6 +733,8 @@ bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {    case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;  unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { +  if (STI.getTargetTriple().getArch() == Triple::r600) +    return Reg;    MAP_REG2REG  } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 2ee19741acc..70681c27169 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -100,7 +100,7 @@ unsigned getMinWavesPerEU(const FeatureBitset &Features);  /// \returns Maximum number of waves per execution unit for given subtarget \p  /// Features without any kind of limitation. -unsigned getMaxWavesPerEU(const FeatureBitset &Features); +unsigned getMaxWavesPerEU();  /// \returns Maximum number of waves per execution unit for given subtarget \p  /// Features and limited by given \p FlatWorkGroupSize.  | 

