diff options
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.td | 9 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIDefines.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrFormats.td | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.h | 10 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SMInstructions.td | 125 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/MIR/AMDGPU/target-index-operands.mir | 14 | ||||
-rw-r--r-- | llvm/test/MC/AMDGPU/smem.s | 24 | ||||
-rw-r--r-- | llvm/test/MC/AMDGPU/smrd-err.s | 2 |
11 files changed, 183 insertions, 35 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index ace0ac24e8f..90796f22b19 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -171,6 +171,12 @@ def FeatureVGPRIndexMode : SubtargetFeature<"vgpr-index-mode", "Has VGPR mode register indexing" >; +def FeatureScalarStores : SubtargetFeature<"scalar-stores", + "HasScalarStores", + "true", + "Has store scalar memory instructions" +>; + //===------------------------------------------------------------===// // Subtarget Features (options and debugging) //===------------------------------------------------------------===// @@ -311,7 +317,8 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS", [FeatureFP64, FeatureLocalMemorySize65536, FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, - FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel + FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel, + FeatureScalarStores ] >; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index c6844d36479..416772199da 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -105,6 +105,7 @@ protected: bool Has16BitInsts; bool HasMovrel; bool HasVGPRIndexMode; + bool HasScalarStores; bool FlatAddressSpace; bool R600ALUInst; bool CaymanISA; @@ -527,6 +528,10 @@ public: return getGeneration() >= VOLCANIC_ISLANDS; } + bool hasScalarStores() const { + return HasScalarStores; + } + bool enableSIScheduler() const { return EnableSIScheduler; } diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index 5f7e9811ffe..f48011fe5e1 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -44,7 +44,8 @@ enum { VOPAsmPrefer32Bit = 1 << 25, Gather4 = 1 << 26, DisableWQM = 1 << 27, - SOPK_ZEXT = 1 << 28 + SOPK_ZEXT = 1 << 28, + SCALAR_STORE = 1 << 29 }; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td index 91e70650162..8976333412b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -60,6 +60,11 @@ class InstSI <dag outs, dag ins, string asm = "", // use it as unsigned. field bits<1> SOPKZext = 0; + // This is an s_store_dword* instruction that requires a cache flush + // on wave termination. It is necessary to distinguish from mayStore + // SMEM instructions like the cache flush ones. + field bits<1> ScalarStore = 0; + // These need to be kept in sync with the enum in SIInstrFlags. let TSFlags{0} = VM_CNT; let TSFlags{1} = EXP_CNT; @@ -94,6 +99,7 @@ class InstSI <dag outs, dag ins, string asm = "", let TSFlags{26} = Gather4; let TSFlags{27} = DisableWQM; let TSFlags{28} = SOPKZext; + let TSFlags{29} = ScalarStore; let SchedRW = [Write32Bit]; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 0acbed12766..72de6497397 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2116,6 +2116,18 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, } } + if (isSMRD(MI)) { + if (MI.mayStore()) { + // The register offset form of scalar stores may only use m0 as the + // soffset register. + const MachineOperand *Soff = getNamedOperand(MI, AMDGPU::OpName::soff); + if (Soff && Soff->getReg() != AMDGPU::M0) { + ErrInfo = "scalar stores must use m0 as offset register"; + return false; + } + } + } + return true; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 552a36e3585..e5d237b6279 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -428,6 +428,16 @@ public: return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT; } + /// \returns true if this is an s_store_dword* instruction. This is more + /// specific than than isSMEM && mayStore. + static bool isScalarStore(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE; + } + + bool isScalarStore(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE; + } + bool isVGPRCopy(const MachineInstr &MI) const { assert(MI.isCopy()); unsigned Dest = MI.getOperand(0).getReg(); diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td index a46cbf3e624..9b9d5588ed5 100644 --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -37,6 +37,7 @@ class SM_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> patt bits<1> has_sbase = 1; bits<1> has_sdst = 1; + bit has_glc = 0; bits<1> has_offset = 1; bits<1> offset_is_imm = 0; } @@ -55,12 +56,25 @@ class SM_Real <SM_Pseudo ps> bits<7> sbase; bits<7> sdst; bits<32> offset; - bits<1> imm = !if(ps.has_offset, ps.offset_is_imm, 0); + bits<1> imm = !if(ps.has_offset, ps.offset_is_imm, 0); } class SM_Load_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]> : SM_Pseudo<opName, outs, ins, asmOps, pattern> { RegisterClass BaseClass; + let mayLoad = 1; + let mayStore = 0; + let has_glc = 1; +} + +class SM_Store_Pseudo <string opName, dag ins, string asmOps, list<dag> pattern = []> + : SM_Pseudo<opName, (outs), ins, asmOps, pattern> { + RegisterClass BaseClass; + RegisterClass SrcClass; + let mayLoad = 0; + let mayStore = 1; + let has_glc = 1; + let ScalarStore = 1; } multiclass SM_Pseudo_Loads<string opName, @@ -68,18 +82,42 @@ multiclass SM_Pseudo_Loads<string opName, RegisterClass dstClass> { def _IMM : SM_Load_Pseudo <opName, (outs dstClass:$sdst), - (ins baseClass:$sbase, i32imm:$offset), - " $sdst, $sbase, $offset", []> { + (ins baseClass:$sbase, i32imm:$offset, i1imm:$glc), + " $sdst, $sbase, $offset$glc", []> { let offset_is_imm = 1; let BaseClass = baseClass; let PseudoInstr = opName # "_IMM"; + let has_glc = 1; } + def _SGPR : SM_Load_Pseudo <opName, (outs dstClass:$sdst), - (ins baseClass:$sbase, SReg_32:$soff), - " $sdst, $sbase, $offset", []> { + (ins baseClass:$sbase, SReg_32:$soff, i1imm:$glc), + " $sdst, $sbase, $offset$glc", []> { let BaseClass = baseClass; let PseudoInstr = opName # "_SGPR"; + let has_glc = 1; + } +} + +multiclass SM_Pseudo_Stores<string opName, + RegisterClass baseClass, + RegisterClass srcClass> { + def _IMM : SM_Store_Pseudo <opName, + (ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, i1imm:$glc), + " $sdata, $sbase, $offset$glc", []> { + let offset_is_imm = 1; + let BaseClass = baseClass; + let SrcClass = srcClass; + let PseudoInstr = opName # "_IMM"; + } + + def _SGPR : SM_Store_Pseudo <opName, + (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, i1imm:$glc), + " $sdata, $sbase, $offset$glc", []> { + let BaseClass = baseClass; + let SrcClass = srcClass; + let PseudoInstr = opName # "_SGPR"; } } @@ -139,6 +177,23 @@ defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads < "s_buffer_load_dwordx16", SReg_128, SReg_512 >; +defm S_STORE_DWORD : SM_Pseudo_Stores <"s_store_dword", SReg_64, SReg_32_XM0>; +defm S_STORE_DWORDX2 : SM_Pseudo_Stores <"s_store_dwordx2", SReg_64, SReg_64>; +defm S_STORE_DWORDX4 : SM_Pseudo_Stores <"s_store_dwordx4", SReg_64, SReg_128>; + +defm S_BUFFER_STORE_DWORD : SM_Pseudo_Stores < + "s_buffer_store_dword", SReg_128, SReg_32_XM0 +>; + +defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores < + "s_buffer_store_dwordx2", SReg_128, SReg_64 +>; + +defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores < + "s_buffer_store_dwordx4", SReg_128, SReg_128 +>; + + def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>; def S_DCACHE_INV : SM_Inval_Pseudo <"s_dcache_inv", int_amdgcn_s_dcache_inv>; @@ -179,13 +234,13 @@ multiclass SMRD_Pattern <string Instr, ValueType vt> { // 1. IMM offset def : Pat < (smrd_load (SMRDImm i64:$sbase, i32:$offset)), - (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset)) + (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0)) >; // 2. SGPR offset def : Pat < (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)), - (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset)) + (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0)) >; } @@ -210,13 +265,13 @@ defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>; // 1. Offset as an immediate def SM_LOAD_PATTERN : Pat < // name this pattern to reuse AddedComplexity on CI (SIload_constant v4i32:$sbase, (SMRDBufferImm i32:$offset)), - (S_BUFFER_LOAD_DWORD_IMM $sbase, $offset) + (S_BUFFER_LOAD_DWORD_IMM $sbase, $offset, 0) >; // 2. Offset loaded in an 32bit SGPR def : Pat < (SIload_constant v4i32:$sbase, (SMRDBufferSgpr i32:$offset)), - (S_BUFFER_LOAD_DWORD_SGPR $sbase, $offset) + (S_BUFFER_LOAD_DWORD_SGPR $sbase, $offset, 0) >; } // End let AddedComplexity = 100 @@ -228,7 +283,7 @@ let Predicates = [isVI] in { // 1. Offset as 20bit DWORD immediate def : Pat < (SIload_constant v4i32:$sbase, IMM20bit:$offset), - (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset)) + (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset), 0) >; def : Pat < @@ -263,15 +318,22 @@ class SMRD_Real_si <bits<5> op, SM_Pseudo ps> let Inst{31-27} = 0x18; //encoding } +// FIXME: Assembler should reject trying to use glc on SMRD +// instructions on SI. multiclass SM_Real_Loads_si<bits<5> op, string ps, SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM), SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> { + def _IMM_si : SMRD_Real_si <op, immPs> { - let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset:$offset); + let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset:$offset, GLC:$glc); } + + // FIXME: The operand name $offset is inconsistent with $soff used + // in the pseudo def _SGPR_si : SMRD_Real_si <op, sgprPs> { - let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset); + let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc); } + } defm S_LOAD_DWORD : SM_Real_Loads_si <0x00, "S_LOAD_DWORD">; @@ -297,6 +359,7 @@ class SMEM_Real_vi <bits<8> op, SM_Pseudo ps> : SM_Real<ps> , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI> , Enc64 { + bit glc; let AssemblerPredicates = [isVI]; let DecoderNamespace = "VI"; @@ -304,10 +367,8 @@ class SMEM_Real_vi <bits<8> op, SM_Pseudo ps> let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?); let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); - // glc is only applicable to scalar stores, which are not yet - // implemented. - let Inst{16} = 0; // glc bit - let Inst{17} = imm; + let Inst{16} = !if(ps.has_glc, glc, ?); + let Inst{17} = imm; let Inst{25-18} = op; let Inst{31-26} = 0x30; //encoding let Inst{51-32} = !if(ps.has_offset, offset{19-0}, ?); @@ -317,10 +378,24 @@ multiclass SM_Real_Loads_vi<bits<8> op, string ps, SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM), SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> { def _IMM_vi : SMEM_Real_vi <op, immPs> { - let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset:$offset); + let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset:$offset, GLC:$glc); } def _SGPR_vi : SMEM_Real_vi <op, sgprPs> { - let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset); + let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc); + } +} + +multiclass SM_Real_Stores_vi<bits<8> op, string ps, + SM_Store_Pseudo immPs = !cast<SM_Store_Pseudo>(ps#_IMM), + SM_Store_Pseudo sgprPs = !cast<SM_Store_Pseudo>(ps#_SGPR)> { + // FIXME: The operand name $offset is inconsistent with $soff used + // in the pseudo + def _IMM_vi : SMEM_Real_vi <op, immPs> { + let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset:$offset, GLC:$glc); + } + + def _SGPR_vi : SMEM_Real_vi <op, sgprPs> { + let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc); } } @@ -335,6 +410,14 @@ defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_vi <0x0a, "S_BUFFER_LOAD_DWORDX4">; defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_vi <0x0b, "S_BUFFER_LOAD_DWORDX8">; defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_vi <0x0c, "S_BUFFER_LOAD_DWORDX16">; +defm S_STORE_DWORD : SM_Real_Stores_vi <0x10, "S_STORE_DWORD">; +defm S_STORE_DWORDX2 : SM_Real_Stores_vi <0x11, "S_STORE_DWORDX2">; +defm S_STORE_DWORDX4 : SM_Real_Stores_vi <0x12, "S_STORE_DWORDX4">; + +defm S_BUFFER_STORE_DWORD : SM_Real_Stores_vi <0x18, "S_BUFFER_STORE_DWORD">; +defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_vi <0x19, "S_BUFFER_STORE_DWORDX2">; +defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_vi <0x1a, "S_BUFFER_STORE_DWORDX4">; + def S_DCACHE_INV_vi : SMEM_Real_vi <0x20, S_DCACHE_INV>; def S_DCACHE_WB_vi : SMEM_Real_vi <0x21, S_DCACHE_WB>; def S_DCACHE_INV_VOL_vi : SMEM_Real_vi <0x22, S_DCACHE_INV_VOL>; @@ -358,7 +441,7 @@ class SMRD_Real_Load_IMM_ci <bits<5> op, SM_Load_Pseudo ps> : let AssemblerPredicates = [isCIOnly]; let DecoderNamespace = "CI"; - let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset); + let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc); let LGKM_CNT = ps.LGKM_CNT; let SMRD = ps.SMRD; @@ -410,7 +493,7 @@ let AddedComplexity = SM_LOAD_PATTERN.AddedComplexity in { class SMRD_Pattern_ci <string Instr, ValueType vt> : Pat < (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)), - (vt (!cast<SM_Pseudo>(Instr#"_IMM_ci") $sbase, $offset))> { + (vt (!cast<SM_Pseudo>(Instr#"_IMM_ci") $sbase, $offset, 0))> { let Predicates = [isCIOnly]; } @@ -422,7 +505,7 @@ def : SMRD_Pattern_ci <"S_LOAD_DWORDX16", v16i32>; def : Pat < (SIload_constant v4i32:$sbase, (SMRDBufferImm32 i32:$offset)), - (S_BUFFER_LOAD_DWORD_IMM_ci $sbase, $offset)> { + (S_BUFFER_LOAD_DWORD_IMM_ci $sbase, $offset, 0)> { let Predicates = [isCI]; // should this be isCIOnly? } diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir index 124f9f519c0..234fe57b513 100644 --- a/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir @@ -46,10 +46,10 @@ body: | %0 = COPY %sgpr2_sgpr3 %1 = COPY %vgpr2 %2 = COPY %vgpr3 - %3 = S_LOAD_DWORDX8_IMM %0, 0 - %4 = S_LOAD_DWORDX4_IMM %0, 12 - %5 = S_LOAD_DWORDX8_IMM %0, 16 - %6 = S_LOAD_DWORDX4_IMM %0, 28 + %3 = S_LOAD_DWORDX8_IMM %0, 0, 0 + %4 = S_LOAD_DWORDX4_IMM %0, 12, 0 + %5 = S_LOAD_DWORDX8_IMM %0, 16, 0 + %6 = S_LOAD_DWORDX4_IMM %0, 28, 0 undef %7.sub0 = S_MOV_B32 212739 %20 = COPY %7 %11 = COPY %20 diff --git a/llvm/test/CodeGen/MIR/AMDGPU/target-index-operands.mir b/llvm/test/CodeGen/MIR/AMDGPU/target-index-operands.mir index e15da0923be..a4e77f281ea 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/target-index-operands.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/target-index-operands.mir @@ -1,4 +1,4 @@ -# RUN: llc -march=amdgcn -mcpu=SI -run-pass none -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -run-pass none -o - %s | FileCheck %s # This test verifies that the MIR parser can parse target index operands. --- | @@ -55,15 +55,15 @@ body: | %sgpr2 = S_ADD_U32 %sgpr2, target-index(amdgpu-constdata-start), implicit-def %scc, implicit-def %scc %sgpr3 = S_ADDC_U32 %sgpr3, 0, implicit-def %scc, implicit %scc, implicit-def %scc, implicit %scc %sgpr4_sgpr5 = S_LSHR_B64 %sgpr2_sgpr3, 32, implicit-def dead %scc - %sgpr6 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 11 + %sgpr6 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 11, 0 %sgpr7 = S_ASHR_I32 %sgpr6, 31, implicit-def dead %scc %sgpr6_sgpr7 = S_LSHL_B64 %sgpr6_sgpr7, 2, implicit-def dead %scc %sgpr2 = S_ADD_U32 %sgpr2, @float_gv, implicit-def %scc %sgpr3 = S_ADDC_U32 %sgpr4, 0, implicit-def dead %scc, implicit %scc %sgpr4 = S_ADD_U32 %sgpr2, %sgpr6, implicit-def %scc %sgpr5 = S_ADDC_U32 %sgpr3, %sgpr7, implicit-def dead %scc, implicit %scc - %sgpr2 = S_LOAD_DWORD_IMM %sgpr4_sgpr5, 0 - %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9 + %sgpr2 = S_LOAD_DWORD_IMM %sgpr4_sgpr5, 0, 0 + %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9, 0 %sgpr7 = S_MOV_B32 61440 %sgpr6 = S_MOV_B32 -1 %vgpr0 = V_MOV_B32_e32 killed %sgpr2, implicit %exec @@ -85,15 +85,15 @@ body: | %sgpr2 = S_ADD_U32 %sgpr2, target-index(amdgpu-constdata-start) + 1, implicit-def %scc, implicit-def %scc %sgpr3 = S_ADDC_U32 %sgpr3, 0, implicit-def %scc, implicit %scc, implicit-def %scc, implicit %scc %sgpr4_sgpr5 = S_LSHR_B64 %sgpr2_sgpr3, 32, implicit-def dead %scc - %sgpr6 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 11 + %sgpr6 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 11, 0 %sgpr7 = S_ASHR_I32 %sgpr6, 31, implicit-def dead %scc %sgpr6_sgpr7 = S_LSHL_B64 %sgpr6_sgpr7, 2, implicit-def dead %scc %sgpr2 = S_ADD_U32 %sgpr2, @float_gv, implicit-def %scc %sgpr3 = S_ADDC_U32 %sgpr4, 0, implicit-def dead %scc, implicit %scc %sgpr4 = S_ADD_U32 %sgpr2, %sgpr6, implicit-def %scc %sgpr5 = S_ADDC_U32 %sgpr3, %sgpr7, implicit-def dead %scc, implicit %scc - %sgpr2 = S_LOAD_DWORD_IMM %sgpr4_sgpr5, 0 - %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9 + %sgpr2 = S_LOAD_DWORD_IMM %sgpr4_sgpr5, 0, 0 + %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9, 0 %sgpr7 = S_MOV_B32 61440 %sgpr6 = S_MOV_B32 -1 %vgpr0 = V_MOV_B32_e32 killed %sgpr2, implicit %exec diff --git a/llvm/test/MC/AMDGPU/smem.s b/llvm/test/MC/AMDGPU/smem.s index ab2dcf4b8a1..d2f10224e21 100644 --- a/llvm/test/MC/AMDGPU/smem.s +++ b/llvm/test/MC/AMDGPU/smem.s @@ -13,3 +13,27 @@ s_dcache_wb_vol s_memrealtime s[4:5] // VI: s_memrealtime s[4:5] ; encoding: [0x00,0x01,0x94,0xc0,0x00,0x00,0x00,0x00] // NOSI: error: instruction not supported on this GPU + +// FIXME: Should error about instruction on GPU +s_store_dword s1, s[2:3], 0xfc +// VI: s_store_dword s1, s[2:3], 0xfc ; encoding: [0x41,0x00,0x42,0xc0,0xfc,0x00,0x00,0x00] +// NOSI: error: instruction not supported on this GPU + +s_store_dword s1, s[2:3], 0xfc glc +// VI: s_store_dword s1, s[2:3], 0xfc glc ; encoding: [0x41,0x00,0x43,0xc0,0xfc,0x00,0x00,0x00] +// NOSI: error: invalid operand for instruction + +s_store_dword s1, s[2:3], s4 +// VI: s_store_dword s1, s[2:3], s4 ; encoding: [0x41,0x00,0x40,0xc0,0x04,0x00,0x00,0x00] +// NOSI: error: instruction not supported on this GPU + +s_store_dword s1, s[2:3], s4 glc +// VI: s_store_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x41,0xc0,0x04,0x00,0x00,0x00] +// NOSI: error: invalid operand for instruction + +// FIXME: Should error on SI instead of silently ignoring glc +s_load_dword s1, s[2:3], 0xfc glc +// VI: s_load_dword s1, s[2:3], 0xfc glc ; encoding: [0x41,0x00,0x03,0xc0,0xfc,0x00,0x00,0x00] + +s_load_dword s1, s[2:3], s4 glc +// VI: s_load_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x01,0xc0,0x04,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/smrd-err.s b/llvm/test/MC/AMDGPU/smrd-err.s index 0f991e4aefe..d7ef74901c6 100644 --- a/llvm/test/MC/AMDGPU/smrd-err.s +++ b/llvm/test/MC/AMDGPU/smrd-err.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=tahiti %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +// RUN: llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=SI %s // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI %s s_load_dwordx4 s[100:103], s[2:3], s4 |