diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/BUFInstructions.td | 23 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/FLATInstructions.td | 18 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 41 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/MIMGInstructions.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIDefines.h | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrFormats.td | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.h | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 37 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir | 447 |
10 files changed, 559 insertions, 28 deletions
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index ef1ccd2c1aa..69bef02b203 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -691,34 +691,53 @@ class MUBUF_AtomicRet_Pseudo<string opName, int addrKind, multiclass MUBUF_Pseudo_Atomics_NO_RTN <string opName, RegisterClass vdataClass, ValueType vdataType, - SDPatternOperator atomic> { + SDPatternOperator atomic, + bit isFP = getIsFP<vdataType>.ret> { + let FPAtomic = isFP in def _OFFSET : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass>, MUBUFAddr64Table <0, NAME>; + + let FPAtomic = isFP in def _ADDR64 : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass>, MUBUFAddr64Table <1, NAME>; + + let FPAtomic = isFP in def _OFFEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>; + + let FPAtomic = isFP in + def _IDXEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>; + + let FPAtomic = isFP in def _BOTHEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>; } multiclass MUBUF_Pseudo_Atomics_RTN <string opName, RegisterClass vdataClass, ValueType vdataType, - SDPatternOperator atomic> { + SDPatternOperator atomic, + bit isFP = getIsFP<vdataType>.ret> { + let FPAtomic = isFP in def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass, [(set vdataType:$vdata, (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$slc), vdataType:$vdata_in))]>, MUBUFAddr64Table <0, NAME # "_RTN">; + let FPAtomic = isFP in def _ADDR64_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, [(set vdataType:$vdata, (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc), vdataType:$vdata_in))]>, MUBUFAddr64Table <1, NAME # "_RTN">; + let FPAtomic = isFP in def _OFFEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>; + + let FPAtomic = isFP in def _IDXEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>; + + let FPAtomic = isFP in def _BOTHEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>; } diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 0196b36a95c..966bb6666cc 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -273,7 +273,8 @@ multiclass FLAT_Atomic_Pseudo< ValueType vt, SDPatternOperator atomic = null_frag, ValueType data_vt = vt, - RegisterClass data_rc = vdst_rc> { + RegisterClass data_rc = vdst_rc, + bit isFP = getIsFP<data_vt>.ret> { def "" : FLAT_AtomicNoRet_Pseudo <opName, (outs), (ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, SLC:$slc), @@ -281,6 +282,7 @@ multiclass FLAT_Atomic_Pseudo< GlobalSaddrTable<0, opName>, AtomicNoRet <opName, 0> { let PseudoInstr = NAME; + let FPAtomic = isFP; } def _RTN : FLAT_AtomicRet_Pseudo <opName, @@ -290,7 +292,9 @@ multiclass FLAT_Atomic_Pseudo< [(set vt:$vdst, (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>, GlobalSaddrTable<0, opName#"_rtn">, - AtomicNoRet <opName, 1>; + AtomicNoRet <opName, 1>{ + let FPAtomic = isFP; + } } multiclass FLAT_Global_Atomic_Pseudo_NO_RTN< @@ -299,7 +303,8 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN< ValueType vt, SDPatternOperator atomic = null_frag, ValueType data_vt = vt, - RegisterClass data_rc = vdst_rc> { + RegisterClass data_rc = vdst_rc, + bit isFP = getIsFP<data_vt>.ret> { def "" : FLAT_AtomicNoRet_Pseudo <opName, (outs), @@ -309,6 +314,7 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN< AtomicNoRet <opName, 0> { let has_saddr = 1; let PseudoInstr = NAME; + let FPAtomic = isFP; } def _SADDR : FLAT_AtomicNoRet_Pseudo <opName, @@ -320,6 +326,7 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN< let has_saddr = 1; let enabled_saddr = 1; let PseudoInstr = NAME#"_SADDR"; + let FPAtomic = isFP; } } @@ -329,7 +336,8 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN< ValueType vt, SDPatternOperator atomic = null_frag, ValueType data_vt = vt, - RegisterClass data_rc = vdst_rc> { + RegisterClass data_rc = vdst_rc, + bit isFP = getIsFP<data_vt>.ret> { def _RTN : FLAT_AtomicRet_Pseudo <opName, (outs vdst_rc:$vdst), @@ -340,6 +348,7 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN< GlobalSaddrTable<0, opName#"_rtn">, AtomicNoRet <opName, 1> { let has_saddr = 1; + let FPAtomic = isFP; } def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName, @@ -351,6 +360,7 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN< let has_saddr = 1; let enabled_saddr = 1; let PseudoInstr = NAME#"_SADDR_RTN"; + let FPAtomic = isFP; } } diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 1d5ff3c4e7b..16436be984f 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -145,6 +145,9 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0) return NoopHazard; + if (checkFPAtomicToDenormModeHazard(MI) > 0) + return NoopHazard; + if (ST.hasNoDataDepHazard()) return NoHazard; @@ -247,6 +250,8 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) { if (ST.hasNSAtoVMEMBug()) WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI)); + WaitStates = std::max(WaitStates, checkFPAtomicToDenormModeHazard(MI)); + if (ST.hasNoDataDepHazard()) return WaitStates; @@ -1138,3 +1143,39 @@ int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) { return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1); } + +int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) { + int FPAtomicToDenormModeWaitStates = 3; + + if (MI->getOpcode() != AMDGPU::S_DENORM_MODE) + return 0; + + auto IsHazardFn = [] (MachineInstr *I) { + if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isFLAT(*I)) + return false; + return SIInstrInfo::isFPAtomic(*I); + }; + + auto IsExpiredFn = [] (MachineInstr *MI, int WaitStates) { + if (WaitStates >= 3 || SIInstrInfo::isVALU(*MI)) + return true; + + switch (MI->getOpcode()) { + case AMDGPU::S_WAITCNT: + case AMDGPU::S_WAITCNT_VSCNT: + case AMDGPU::S_WAITCNT_VMCNT: + case AMDGPU::S_WAITCNT_EXPCNT: + case AMDGPU::S_WAITCNT_LGKMCNT: + case AMDGPU::S_WAITCNT_IDLE: + return true; + default: + break; + } + + return false; + }; + + + return FPAtomicToDenormModeWaitStates - + ::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn); +} diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h index 0c4c9d9d982..cf914b39804 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -84,6 +84,7 @@ private: int checkAnyInstHazards(MachineInstr *MI); int checkReadM0Hazards(MachineInstr *SMovRel); int checkNSAtoVMEMHazard(MachineInstr *MI); + int checkFPAtomicToDenormModeHazard(MachineInstr *MI); void fixHazards(MachineInstr *MI); bool fixVcmpxPermlaneHazards(MachineInstr *MI); bool fixVMEMtoScalarWriteHazards(MachineInstr *MI); diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 6f7dbc76f2e..bb0c9306f53 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -716,9 +716,11 @@ defm IMAGE_ATOMIC_OR : MIMG_Atomic <mimg<0x19>, "image_atomic_or">; defm IMAGE_ATOMIC_XOR : MIMG_Atomic <mimg<0x1a>, "image_atomic_xor">; defm IMAGE_ATOMIC_INC : MIMG_Atomic <mimg<0x1b>, "image_atomic_inc">; defm IMAGE_ATOMIC_DEC : MIMG_Atomic <mimg<0x1c>, "image_atomic_dec">; +//let FPAtomic = 1 in { //def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"image_atomic_fcmpswap", 0x0000001d, 1>; -- not on VI //def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"image_atomic_fmin", 0x0000001e>; -- not on VI //def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"image_atomic_fmax", 0x0000001f>; -- not on VI +//} // End let FPAtomic = 1 defm IMAGE_SAMPLE : MIMG_Sampler_WQM <0x00000020, AMDGPUSample>; defm IMAGE_SAMPLE_CL : MIMG_Sampler_WQM <0x00000021, AMDGPUSample_cl>; defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, AMDGPUSample_d>; diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index cc96f1de43a..50cd079721c 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -93,7 +93,10 @@ enum : uint64_t { IsNonFlatSeg = UINT64_C(1) << 51, // Uses floating point double precision rounding mode - FPDPRounding = UINT64_C(1) << 52 + FPDPRounding = UINT64_C(1) << 52, + + // Instruction is FP atomic. + FPAtomic = UINT64_C(1) << 53 }; // v_cmp_class_* etc. use a 10-bit mask for what operation is checked. diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td index e0f928bdf86..eb64a0685de 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -118,6 +118,9 @@ class InstSI <dag outs, dag ins, string asm = "", // rounding mode flags field bit FPDPRounding = 0; + // Instruction is FP atomic. + field bit FPAtomic = 0; + // These need to be kept in sync with the enum in SIInstrFlags. let TSFlags{0} = SALU; let TSFlags{1} = VALU; @@ -179,6 +182,8 @@ class InstSI <dag outs, dag ins, string asm = "", let TSFlags{52} = FPDPRounding; + let TSFlags{53} = FPAtomic; + let SchedRW = [Write32Bit]; field bits<1> DisableSIDecoder = 0; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index f96b03dcd2c..64eb60b4690 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -631,6 +631,14 @@ public: return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding; } + static bool isFPAtomic(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic; + } + + bool isFPAtomic(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::FPAtomic; + } + bool isVGPRCopy(const MachineInstr &MI) const { assert(MI.isCopy()); unsigned Dest = MI.getOperand(0).getReg(); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index c06356a685e..1a3e16afce3 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1243,6 +1243,17 @@ class getVALUDstForVT<ValueType VT> { VOPDstS64orS32)))); // else VT == i1 } +// Returns true if VT is floating point. +class getIsFP<ValueType VT> { + bit ret = !if(!eq(VT.Value, f16.Value), 1, + !if(!eq(VT.Value, v2f16.Value), 1, + !if(!eq(VT.Value, f32.Value), 1, + !if(!eq(VT.Value, v2f32.Value), 1, + !if(!eq(VT.Value, f64.Value), 1, + !if(!eq(VT.Value, v2f64.Value), 1, + 0)))))); +} + // Returns the register class to use for the destination of VOP[12C] // instructions with SDWA extension class getSDWADstForVT<ValueType VT> { @@ -1254,11 +1265,7 @@ class getSDWADstForVT<ValueType VT> { // Returns the register class to use for source 0 of VOP[12C] // instructions for the given VT. class getVOPSrc0ForVT<ValueType VT> { - bit isFP = !if(!eq(VT.Value, f16.Value), 1, - !if(!eq(VT.Value, v2f16.Value), 1, - !if(!eq(VT.Value, f32.Value), 1, - !if(!eq(VT.Value, f64.Value), 1, - 0)))); + bit isFP = getIsFP<VT>.ret; RegisterOperand ret = !if(isFP, @@ -1292,9 +1299,7 @@ class getVregSrcForVT<ValueType VT> { } class getSDWASrcForVT <ValueType VT> { - bit isFP = !if(!eq(VT.Value, f16.Value), 1, - !if(!eq(VT.Value, f32.Value), 1, - 0)); + bit isFP = getIsFP<VT>.ret; RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32); RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32); RegisterOperand ret = !if(isFP, retFlt, retInt); @@ -1303,11 +1308,7 @@ class getSDWASrcForVT <ValueType VT> { // Returns the register class to use for sources of VOP3 instructions for the // given VT. class getVOP3SrcForVT<ValueType VT> { - bit isFP = !if(!eq(VT.Value, f16.Value), 1, - !if(!eq(VT.Value, v2f16.Value), 1, - !if(!eq(VT.Value, f32.Value), 1, - !if(!eq(VT.Value, f64.Value), 1, - 0)))); + bit isFP = getIsFP<VT>.ret; RegisterOperand ret = !if(!eq(VT.Size, 128), VSrc_128, @@ -1351,10 +1352,7 @@ class isModifierType<ValueType SrcVT> { // Return type of input modifiers operand for specified input operand class getSrcMod <ValueType VT, bit EnableF32SrcMods> { - bit isFP = !if(!eq(VT.Value, f16.Value), 1, - !if(!eq(VT.Value, f32.Value), 1, - !if(!eq(VT.Value, f64.Value), 1, - 0))); + bit isFP = getIsFP<VT>.ret; bit isPacked = isPackedType<VT>.ret; Operand ret = !if(!eq(VT.Size, 64), !if(isFP, FP64InputMods, Int64InputMods), @@ -1373,10 +1371,7 @@ class getOpSelMod <ValueType VT> { // Return type of input modifiers operand specified input operand for DPP class getSrcModExt <ValueType VT> { - bit isFP = !if(!eq(VT.Value, f16.Value), 1, - !if(!eq(VT.Value, f32.Value), 1, - !if(!eq(VT.Value, f64.Value), 1, - 0))); + bit isFP = getIsFP<VT>.ret; Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods); } diff --git a/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir b/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir new file mode 100644 index 00000000000..f1b5ee3524d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir @@ -0,0 +1,447 @@ +# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: name: flat_atomic_fcmpswap_to_s_denorm_mode +# GCN: FLAT_ATOMIC_FCMPSWAP +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_atomic_fcmpswap_to_s_denorm_mode +body: | + bb.0: + FLAT_ATOMIC_FCMPSWAP undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_atomic_fcmpswap_x2_to_s_denorm_mode +# GCN: FLAT_ATOMIC_FCMPSWAP_X2 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_atomic_fcmpswap_x2_to_s_denorm_mode +body: | + bb.0: + FLAT_ATOMIC_FCMPSWAP_X2 undef %0:vreg_64, undef %1:vreg_128, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_atomic_fmax_to_s_denorm_mode +# GCN: FLAT_ATOMIC_FMAX +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_atomic_fmax_to_s_denorm_mode +body: | + bb.0: + FLAT_ATOMIC_FMAX undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_atomic_fmax_x2_to_s_denorm_mode +# GCN: FLAT_ATOMIC_FMAX_X2 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_atomic_fmax_x2_to_s_denorm_mode +body: | + bb.0: + FLAT_ATOMIC_FMAX_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_atomic_fmin_to_s_denorm_mode +# GCN: FLAT_ATOMIC_FMIN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_atomic_fmin_to_s_denorm_mode +body: | + bb.0: + FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_atomic_fmin_x2_to_s_denorm_mode +# GCN: FLAT_ATOMIC_FMIN_X2 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_atomic_fmin_x2_to_s_denorm_mode +body: | + bb.0: + FLAT_ATOMIC_FMIN_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode +# GCN: FLAT_ATOMIC_FCMPSWAP_X2_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vreg_64 = FLAT_ATOMIC_FCMPSWAP_X2_RTN undef %0:vreg_64, undef %1:vreg_128, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_atomic_fmax_rtn_to_s_denorm_mode +# GCN: FLAT_ATOMIC_FMAX_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_atomic_fmax_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vgpr_32 = FLAT_ATOMIC_FMAX_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode +# GCN: FLAT_ATOMIC_FMAX_X2_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vreg_64 = FLAT_ATOMIC_FMAX_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_atomic_fmin_rtn_to_s_denorm_mode +# GCN: FLAT_ATOMIC_FMIN_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_atomic_fmin_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vgpr_32 = FLAT_ATOMIC_FMIN_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode +# GCN: FLAT_ATOMIC_FMIN_X2_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vreg_64 = FLAT_ATOMIC_FMIN_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode +# GCN: FLAT_ATOMIC_FCMPSWAP_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vgpr_32 = FLAT_ATOMIC_FCMPSWAP_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fcmpswap_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FCMPSWAP +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fcmpswap_to_s_denorm_mode +body: | + bb.0: + GLOBAL_ATOMIC_FCMPSWAP undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fcmpswap_x2_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fcmpswap_x2_to_s_denorm_mode +body: | + bb.0: + GLOBAL_ATOMIC_FCMPSWAP_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fmax_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FMAX +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fmax_to_s_denorm_mode +body: | + bb.0: + GLOBAL_ATOMIC_FMAX undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fmax_x2_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FMAX_X2 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fmax_x2_to_s_denorm_mode +body: | + bb.0: + GLOBAL_ATOMIC_FMAX_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fmin_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FMIN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fmin_to_s_denorm_mode +body: | + bb.0: + GLOBAL_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fmin_x2_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FMIN_X2 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fmin_x2_to_s_denorm_mode +body: | + bb.0: + GLOBAL_ATOMIC_FMIN_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fcmpswap_rtn_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FCMPSWAP_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fcmpswap_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vgpr_32 = GLOBAL_ATOMIC_FCMPSWAP_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fmax_rtn_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FMAX_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fmax_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vgpr_32 = GLOBAL_ATOMIC_FMAX_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fmax_x2_rtn_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FMAX_X2_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fmax_x2_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fmin_rtn_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FMIN_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fmin_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vgpr_32 = GLOBAL_ATOMIC_FMIN_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fmin_x2_rtn_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FMIN_X2_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fmin_x2_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fcmpswap_saddr_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FCMPSWAP_SADDR +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fcmpswap_saddr_to_s_denorm_mode +body: | + bb.0: + GLOBAL_ATOMIC_FCMPSWAP_SADDR undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FMAX_SADDR_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vgpr_32 = GLOBAL_ATOMIC_FMAX_SADDR_RTN undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FMIN_SADDR_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vgpr_32 = GLOBAL_ATOMIC_FMIN_SADDR_RTN undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode +# GCN: GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_NOP 0 +# GCN-NEXT: S_DENORM_MODE +--- +name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode +body: | + bb.0: + %2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_fp_atomic_to_s_denorm_mode_waitcnt +# GCN: FLAT_ATOMIC_FMIN +# GCN-NEXT: S_WAITCNT +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_fp_atomic_to_s_denorm_mode_waitcnt +body: | + bb.0: + FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + S_WAITCNT 0 + S_DENORM_MODE 0 +... + +# GCN-LABEL: name: flat_fp_atomic_to_s_denorm_mode_valu +# GCN: FLAT_ATOMIC_FMIN +# GCN-NEXT: V_ADD_F32_e32 +# GCN-NEXT: S_DENORM_MODE +--- +name: flat_fp_atomic_to_s_denorm_mode_valu +body: | + bb.0: + FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) + %2:vgpr_32 = V_ADD_F32_e32 undef %1:vgpr_32, undef %1:vgpr_32, implicit $exec + S_DENORM_MODE 0 +... |

