diff options
| author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-06-21 16:30:14 +0000 | 
|---|---|---|
| committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-06-21 16:30:14 +0000 | 
| commit | bdf7f81b89d194dc6e629c769df1240b927ee392 (patch) | |
| tree | 51be2ca884ee461962e33cb95ac40bb7e04ac2e8 /llvm/lib | |
| parent | dbcdad51ff8263240ab236404a2c23bb2d6c3b2e (diff) | |
| download | bcm5719-llvm-bdf7f81b89d194dc6e629c769df1240b927ee392.tar.gz bcm5719-llvm-bdf7f81b89d194dc6e629c769df1240b927ee392.zip  | |
[AMDGPU] hazard recognizer for fp atomic to s_denorm_mode
This requires 3 wait states unless there is a wait or VALU in
between.
Differential Revision: https://reviews.llvm.org/D63619
llvm-svn: 364074
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/BUFInstructions.td | 23 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/FLATInstructions.td | 18 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 41 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/MIMGInstructions.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIDefines.h | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrFormats.td | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.h | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 37 | 
9 files changed, 112 insertions, 28 deletions
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index ef1ccd2c1aa..69bef02b203 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -691,34 +691,53 @@ class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,  multiclass MUBUF_Pseudo_Atomics_NO_RTN <string opName,                                          RegisterClass vdataClass,                                          ValueType vdataType, -                                        SDPatternOperator atomic> { +                                        SDPatternOperator atomic, +                                        bit isFP = getIsFP<vdataType>.ret> { +  let FPAtomic = isFP in    def _OFFSET : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass>,                  MUBUFAddr64Table <0, NAME>; + +  let FPAtomic = isFP in    def _ADDR64 : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass>,                  MUBUFAddr64Table <1, NAME>; + +  let FPAtomic = isFP in    def _OFFEN  : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.OffEn,  vdataClass>; + +  let FPAtomic = isFP in +    def _IDXEN  : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.IdxEn,  vdataClass>; + +  let FPAtomic = isFP in    def _BOTHEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;  }  multiclass MUBUF_Pseudo_Atomics_RTN <string opName,                                       RegisterClass vdataClass,                                       ValueType vdataType, -                                     SDPatternOperator atomic> { +                                     SDPatternOperator atomic, +                                     bit isFP = getIsFP<vdataType>.ret> { +  let FPAtomic = isFP in    def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass,      [(set vdataType:$vdata,       (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$slc),               vdataType:$vdata_in))]>,      MUBUFAddr64Table <0, NAME # "_RTN">; +  let FPAtomic = isFP in    def _ADDR64_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,      [(set vdataType:$vdata,       (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc),               vdataType:$vdata_in))]>,      MUBUFAddr64Table <1, NAME # "_RTN">; +  let FPAtomic = isFP in    def _OFFEN_RTN  : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.OffEn,  vdataClass>; + +  let FPAtomic = isFP in    def _IDXEN_RTN  : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.IdxEn,  vdataClass>; + +  let FPAtomic = isFP in    def _BOTHEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;  } diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 0196b36a95c..966bb6666cc 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -273,7 +273,8 @@ multiclass FLAT_Atomic_Pseudo<    ValueType vt,    SDPatternOperator atomic = null_frag,    ValueType data_vt = vt, -  RegisterClass data_rc = vdst_rc> { +  RegisterClass data_rc = vdst_rc, +  bit isFP = getIsFP<data_vt>.ret> {    def "" : FLAT_AtomicNoRet_Pseudo <opName,      (outs),      (ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, SLC:$slc), @@ -281,6 +282,7 @@ multiclass FLAT_Atomic_Pseudo<      GlobalSaddrTable<0, opName>,      AtomicNoRet <opName, 0> {      let PseudoInstr = NAME; +    let FPAtomic = isFP;    }    def _RTN : FLAT_AtomicRet_Pseudo <opName, @@ -290,7 +292,9 @@ multiclass FLAT_Atomic_Pseudo<      [(set vt:$vdst,        (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,         GlobalSaddrTable<0, opName#"_rtn">, -       AtomicNoRet <opName, 1>; +       AtomicNoRet <opName, 1>{ +    let FPAtomic = isFP; +  }  }  multiclass FLAT_Global_Atomic_Pseudo_NO_RTN< @@ -299,7 +303,8 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<    ValueType vt,    SDPatternOperator atomic = null_frag,    ValueType data_vt = vt, -  RegisterClass data_rc = vdst_rc> { +  RegisterClass data_rc = vdst_rc, +  bit isFP = getIsFP<data_vt>.ret> {    def "" : FLAT_AtomicNoRet_Pseudo <opName,      (outs), @@ -309,6 +314,7 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<      AtomicNoRet <opName, 0> {      let has_saddr = 1;      let PseudoInstr = NAME; +    let FPAtomic = isFP;    }    def _SADDR : FLAT_AtomicNoRet_Pseudo <opName, @@ -320,6 +326,7 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<      let has_saddr = 1;      let enabled_saddr = 1;      let PseudoInstr = NAME#"_SADDR"; +    let FPAtomic = isFP;    }  } @@ -329,7 +336,8 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<    ValueType vt,    SDPatternOperator atomic = null_frag,    ValueType data_vt = vt, -  RegisterClass data_rc = vdst_rc> { +  RegisterClass data_rc = vdst_rc, +  bit isFP = getIsFP<data_vt>.ret> {    def _RTN : FLAT_AtomicRet_Pseudo <opName,      (outs vdst_rc:$vdst), @@ -340,6 +348,7 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<        GlobalSaddrTable<0, opName#"_rtn">,        AtomicNoRet <opName, 1> {      let has_saddr = 1; +    let FPAtomic = isFP;    }    def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName, @@ -351,6 +360,7 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<       let has_saddr = 1;       let enabled_saddr = 1;       let PseudoInstr = NAME#"_SADDR_RTN"; +     let FPAtomic = isFP;    }  } diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 1d5ff3c4e7b..16436be984f 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -145,6 +145,9 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {    if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)      return NoopHazard; +  if (checkFPAtomicToDenormModeHazard(MI) > 0) +    return NoopHazard; +    if (ST.hasNoDataDepHazard())      return NoHazard; @@ -247,6 +250,8 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {    if (ST.hasNSAtoVMEMBug())      WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI)); +  WaitStates = std::max(WaitStates, checkFPAtomicToDenormModeHazard(MI)); +    if (ST.hasNoDataDepHazard())      return WaitStates; @@ -1138,3 +1143,39 @@ int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) {    return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1);  } + +int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) { +  int FPAtomicToDenormModeWaitStates = 3; + +  if (MI->getOpcode() != AMDGPU::S_DENORM_MODE) +    return 0; + +  auto IsHazardFn = [] (MachineInstr *I) { +    if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isFLAT(*I)) +      return false; +    return SIInstrInfo::isFPAtomic(*I); +  }; + +  auto IsExpiredFn = [] (MachineInstr *MI, int WaitStates) { +    if (WaitStates >= 3 || SIInstrInfo::isVALU(*MI)) +      return true; + +    switch (MI->getOpcode()) { +    case AMDGPU::S_WAITCNT: +    case AMDGPU::S_WAITCNT_VSCNT: +    case AMDGPU::S_WAITCNT_VMCNT: +    case AMDGPU::S_WAITCNT_EXPCNT: +    case AMDGPU::S_WAITCNT_LGKMCNT: +    case AMDGPU::S_WAITCNT_IDLE: +      return true; +    default: +      break; +    } + +    return false; +  }; + + +  return FPAtomicToDenormModeWaitStates - +         ::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn); +} diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h index 0c4c9d9d982..cf914b39804 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -84,6 +84,7 @@ private:    int checkAnyInstHazards(MachineInstr *MI);    int checkReadM0Hazards(MachineInstr *SMovRel);    int checkNSAtoVMEMHazard(MachineInstr *MI); +  int checkFPAtomicToDenormModeHazard(MachineInstr *MI);    void fixHazards(MachineInstr *MI);    bool fixVcmpxPermlaneHazards(MachineInstr *MI);    bool fixVMEMtoScalarWriteHazards(MachineInstr *MI); diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 6f7dbc76f2e..bb0c9306f53 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -716,9 +716,11 @@ defm IMAGE_ATOMIC_OR : MIMG_Atomic <mimg<0x19>, "image_atomic_or">;  defm IMAGE_ATOMIC_XOR : MIMG_Atomic <mimg<0x1a>, "image_atomic_xor">;  defm IMAGE_ATOMIC_INC : MIMG_Atomic <mimg<0x1b>, "image_atomic_inc">;  defm IMAGE_ATOMIC_DEC : MIMG_Atomic <mimg<0x1c>, "image_atomic_dec">; +//let FPAtomic = 1 in {  //def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"image_atomic_fcmpswap", 0x0000001d, 1>; -- not on VI  //def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"image_atomic_fmin", 0x0000001e>; -- not on VI  //def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"image_atomic_fmax", 0x0000001f>; -- not on VI +//} // End let FPAtomic = 1  defm IMAGE_SAMPLE           : MIMG_Sampler_WQM <0x00000020, AMDGPUSample>;  defm IMAGE_SAMPLE_CL        : MIMG_Sampler_WQM <0x00000021, AMDGPUSample_cl>;  defm IMAGE_SAMPLE_D         : MIMG_Sampler <0x00000022, AMDGPUSample_d>; diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index cc96f1de43a..50cd079721c 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -93,7 +93,10 @@ enum : uint64_t {    IsNonFlatSeg = UINT64_C(1) << 51,    // Uses floating point double precision rounding mode -  FPDPRounding = UINT64_C(1) << 52 +  FPDPRounding = UINT64_C(1) << 52, + +  // Instruction is FP atomic. +  FPAtomic = UINT64_C(1) << 53  };  // v_cmp_class_* etc. use a 10-bit mask for what operation is checked. diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td index e0f928bdf86..eb64a0685de 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -118,6 +118,9 @@ class InstSI <dag outs, dag ins, string asm = "",    // rounding mode flags    field bit FPDPRounding = 0; +  // Instruction is FP atomic. +  field bit FPAtomic = 0; +    // These need to be kept in sync with the enum in SIInstrFlags.    let TSFlags{0} = SALU;    let TSFlags{1} = VALU; @@ -179,6 +182,8 @@ class InstSI <dag outs, dag ins, string asm = "",    let TSFlags{52} = FPDPRounding; +  let TSFlags{53} = FPAtomic; +    let SchedRW = [Write32Bit];    field bits<1> DisableSIDecoder = 0; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index f96b03dcd2c..64eb60b4690 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -631,6 +631,14 @@ public:      return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding;    } +  static bool isFPAtomic(const MachineInstr &MI) { +    return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic; +  } + +  bool isFPAtomic(uint16_t Opcode) const { +    return get(Opcode).TSFlags & SIInstrFlags::FPAtomic; +  } +    bool isVGPRCopy(const MachineInstr &MI) const {      assert(MI.isCopy());      unsigned Dest = MI.getOperand(0).getReg(); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index c06356a685e..1a3e16afce3 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1243,6 +1243,17 @@ class getVALUDstForVT<ValueType VT> {                                VOPDstS64orS32)))); // else VT == i1  } +// Returns true if VT is floating point. +class getIsFP<ValueType VT> { +  bit ret = !if(!eq(VT.Value, f16.Value), 1, +            !if(!eq(VT.Value, v2f16.Value), 1, +            !if(!eq(VT.Value, f32.Value), 1, +            !if(!eq(VT.Value, v2f32.Value), 1, +            !if(!eq(VT.Value, f64.Value), 1, +            !if(!eq(VT.Value, v2f64.Value), 1, +            0)))))); +} +  // Returns the register class to use for the destination of VOP[12C]  // instructions with SDWA extension  class getSDWADstForVT<ValueType VT> { @@ -1254,11 +1265,7 @@ class getSDWADstForVT<ValueType VT> {  // Returns the register class to use for source 0 of VOP[12C]  // instructions for the given VT.  class getVOPSrc0ForVT<ValueType VT> { -  bit isFP = !if(!eq(VT.Value, f16.Value), 1, -             !if(!eq(VT.Value, v2f16.Value), 1, -             !if(!eq(VT.Value, f32.Value), 1, -             !if(!eq(VT.Value, f64.Value), 1, -             0)))); +  bit isFP = getIsFP<VT>.ret;    RegisterOperand ret =      !if(isFP, @@ -1292,9 +1299,7 @@ class getVregSrcForVT<ValueType VT> {  }  class getSDWASrcForVT <ValueType VT> { -  bit isFP = !if(!eq(VT.Value, f16.Value), 1, -             !if(!eq(VT.Value, f32.Value), 1, -             0)); +  bit isFP = getIsFP<VT>.ret;    RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32);    RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32);    RegisterOperand ret = !if(isFP, retFlt, retInt); @@ -1303,11 +1308,7 @@ class getSDWASrcForVT <ValueType VT> {  // Returns the register class to use for sources of VOP3 instructions for the  // given VT.  class getVOP3SrcForVT<ValueType VT> { -  bit isFP = !if(!eq(VT.Value, f16.Value), 1, -             !if(!eq(VT.Value, v2f16.Value), 1, -             !if(!eq(VT.Value, f32.Value), 1, -             !if(!eq(VT.Value, f64.Value), 1, -             0)))); +  bit isFP = getIsFP<VT>.ret;    RegisterOperand ret =    !if(!eq(VT.Size, 128),       VSrc_128, @@ -1351,10 +1352,7 @@ class isModifierType<ValueType SrcVT> {  // Return type of input modifiers operand for specified input operand  class getSrcMod <ValueType VT, bit EnableF32SrcMods> { -  bit isFP = !if(!eq(VT.Value, f16.Value), 1, -               !if(!eq(VT.Value, f32.Value), 1, -               !if(!eq(VT.Value, f64.Value), 1, -               0))); +  bit isFP = getIsFP<VT>.ret;    bit isPacked = isPackedType<VT>.ret;    Operand ret =  !if(!eq(VT.Size, 64),                       !if(isFP, FP64InputMods, Int64InputMods), @@ -1373,10 +1371,7 @@ class getOpSelMod <ValueType VT> {  // Return type of input modifiers operand specified input operand for DPP  class getSrcModExt <ValueType VT> { -    bit isFP = !if(!eq(VT.Value, f16.Value), 1, -               !if(!eq(VT.Value, f32.Value), 1, -               !if(!eq(VT.Value, f64.Value), 1, -               0))); +  bit isFP = getIsFP<VT>.ret;    Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods);  }  | 

