summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td9
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h5
-rw-r--r--llvm/lib/Target/AMDGPU/SIDefines.h3
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrFormats.td6
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h10
-rw-r--r--llvm/lib/Target/AMDGPU/SMInstructions.td125
-rw-r--r--llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir8
-rw-r--r--llvm/test/CodeGen/MIR/AMDGPU/target-index-operands.mir14
-rw-r--r--llvm/test/MC/AMDGPU/smem.s24
-rw-r--r--llvm/test/MC/AMDGPU/smrd-err.s2
11 files changed, 183 insertions, 35 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index ace0ac24e8f..90796f22b19 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -171,6 +171,12 @@ def FeatureVGPRIndexMode : SubtargetFeature<"vgpr-index-mode",
"Has VGPR mode register indexing"
>;
+def FeatureScalarStores : SubtargetFeature<"scalar-stores",
+ "HasScalarStores",
+ "true",
+ "Has store scalar memory instructions"
+>;
+
//===------------------------------------------------------------===//
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
@@ -311,7 +317,8 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
[FeatureFP64, FeatureLocalMemorySize65536,
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
- FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel
+ FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
+ FeatureScalarStores
]
>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index c6844d36479..416772199da 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -105,6 +105,7 @@ protected:
bool Has16BitInsts;
bool HasMovrel;
bool HasVGPRIndexMode;
+ bool HasScalarStores;
bool FlatAddressSpace;
bool R600ALUInst;
bool CaymanISA;
@@ -527,6 +528,10 @@ public:
return getGeneration() >= VOLCANIC_ISLANDS;
}
+ bool hasScalarStores() const {
+ return HasScalarStores;
+ }
+
bool enableSIScheduler() const {
return EnableSIScheduler;
}
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 5f7e9811ffe..f48011fe5e1 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -44,7 +44,8 @@ enum {
VOPAsmPrefer32Bit = 1 << 25,
Gather4 = 1 << 26,
DisableWQM = 1 << 27,
- SOPK_ZEXT = 1 << 28
+ SOPK_ZEXT = 1 << 28,
+ SCALAR_STORE = 1 << 29
};
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index 91e70650162..8976333412b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -60,6 +60,11 @@ class InstSI <dag outs, dag ins, string asm = "",
// use it as unsigned.
field bits<1> SOPKZext = 0;
+ // This is an s_store_dword* instruction that requires a cache flush
+ // on wave termination. It is necessary to distinguish from mayStore
+ // SMEM instructions like the cache flush ones.
+ field bits<1> ScalarStore = 0;
+
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = VM_CNT;
let TSFlags{1} = EXP_CNT;
@@ -94,6 +99,7 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{26} = Gather4;
let TSFlags{27} = DisableWQM;
let TSFlags{28} = SOPKZext;
+ let TSFlags{29} = ScalarStore;
let SchedRW = [Write32Bit];
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0acbed12766..72de6497397 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2116,6 +2116,18 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
+ if (isSMRD(MI)) {
+ if (MI.mayStore()) {
+ // The register offset form of scalar stores may only use m0 as the
+ // soffset register.
+ const MachineOperand *Soff = getNamedOperand(MI, AMDGPU::OpName::soff);
+ if (Soff && Soff->getReg() != AMDGPU::M0) {
+ ErrInfo = "scalar stores must use m0 as offset register";
+ return false;
+ }
+ }
+ }
+
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 552a36e3585..e5d237b6279 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -428,6 +428,16 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT;
}
+ /// \returns true if this is an s_store_dword* instruction. This is more
+ /// specific than than isSMEM && mayStore.
+ static bool isScalarStore(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE;
+ }
+
+ bool isScalarStore(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE;
+ }
+
bool isVGPRCopy(const MachineInstr &MI) const {
assert(MI.isCopy());
unsigned Dest = MI.getOperand(0).getReg();
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index a46cbf3e624..9b9d5588ed5 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -37,6 +37,7 @@ class SM_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> patt
bits<1> has_sbase = 1;
bits<1> has_sdst = 1;
+ bit has_glc = 0;
bits<1> has_offset = 1;
bits<1> offset_is_imm = 0;
}
@@ -55,12 +56,25 @@ class SM_Real <SM_Pseudo ps>
bits<7> sbase;
bits<7> sdst;
bits<32> offset;
- bits<1> imm = !if(ps.has_offset, ps.offset_is_imm, 0);
+ bits<1> imm = !if(ps.has_offset, ps.offset_is_imm, 0);
}
class SM_Load_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]>
: SM_Pseudo<opName, outs, ins, asmOps, pattern> {
RegisterClass BaseClass;
+ let mayLoad = 1;
+ let mayStore = 0;
+ let has_glc = 1;
+}
+
+class SM_Store_Pseudo <string opName, dag ins, string asmOps, list<dag> pattern = []>
+ : SM_Pseudo<opName, (outs), ins, asmOps, pattern> {
+ RegisterClass BaseClass;
+ RegisterClass SrcClass;
+ let mayLoad = 0;
+ let mayStore = 1;
+ let has_glc = 1;
+ let ScalarStore = 1;
}
multiclass SM_Pseudo_Loads<string opName,
@@ -68,18 +82,42 @@ multiclass SM_Pseudo_Loads<string opName,
RegisterClass dstClass> {
def _IMM : SM_Load_Pseudo <opName,
(outs dstClass:$sdst),
- (ins baseClass:$sbase, i32imm:$offset),
- " $sdst, $sbase, $offset", []> {
+ (ins baseClass:$sbase, i32imm:$offset, i1imm:$glc),
+ " $sdst, $sbase, $offset$glc", []> {
let offset_is_imm = 1;
let BaseClass = baseClass;
let PseudoInstr = opName # "_IMM";
+ let has_glc = 1;
}
+
def _SGPR : SM_Load_Pseudo <opName,
(outs dstClass:$sdst),
- (ins baseClass:$sbase, SReg_32:$soff),
- " $sdst, $sbase, $offset", []> {
+ (ins baseClass:$sbase, SReg_32:$soff, i1imm:$glc),
+ " $sdst, $sbase, $offset$glc", []> {
let BaseClass = baseClass;
let PseudoInstr = opName # "_SGPR";
+ let has_glc = 1;
+ }
+}
+
+multiclass SM_Pseudo_Stores<string opName,
+ RegisterClass baseClass,
+ RegisterClass srcClass> {
+ def _IMM : SM_Store_Pseudo <opName,
+ (ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, i1imm:$glc),
+ " $sdata, $sbase, $offset$glc", []> {
+ let offset_is_imm = 1;
+ let BaseClass = baseClass;
+ let SrcClass = srcClass;
+ let PseudoInstr = opName # "_IMM";
+ }
+
+ def _SGPR : SM_Store_Pseudo <opName,
+ (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, i1imm:$glc),
+ " $sdata, $sbase, $offset$glc", []> {
+ let BaseClass = baseClass;
+ let SrcClass = srcClass;
+ let PseudoInstr = opName # "_SGPR";
}
}
@@ -139,6 +177,23 @@ defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <
"s_buffer_load_dwordx16", SReg_128, SReg_512
>;
+defm S_STORE_DWORD : SM_Pseudo_Stores <"s_store_dword", SReg_64, SReg_32_XM0>;
+defm S_STORE_DWORDX2 : SM_Pseudo_Stores <"s_store_dwordx2", SReg_64, SReg_64>;
+defm S_STORE_DWORDX4 : SM_Pseudo_Stores <"s_store_dwordx4", SReg_64, SReg_128>;
+
+defm S_BUFFER_STORE_DWORD : SM_Pseudo_Stores <
+ "s_buffer_store_dword", SReg_128, SReg_32_XM0
+>;
+
+defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores <
+ "s_buffer_store_dwordx2", SReg_128, SReg_64
+>;
+
+defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores <
+ "s_buffer_store_dwordx4", SReg_128, SReg_128
+>;
+
+
def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>;
def S_DCACHE_INV : SM_Inval_Pseudo <"s_dcache_inv", int_amdgcn_s_dcache_inv>;
@@ -179,13 +234,13 @@ multiclass SMRD_Pattern <string Instr, ValueType vt> {
// 1. IMM offset
def : Pat <
(smrd_load (SMRDImm i64:$sbase, i32:$offset)),
- (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset))
+ (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))
>;
// 2. SGPR offset
def : Pat <
(smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
- (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset))
+ (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0))
>;
}
@@ -210,13 +265,13 @@ defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>;
// 1. Offset as an immediate
def SM_LOAD_PATTERN : Pat < // name this pattern to reuse AddedComplexity on CI
(SIload_constant v4i32:$sbase, (SMRDBufferImm i32:$offset)),
- (S_BUFFER_LOAD_DWORD_IMM $sbase, $offset)
+ (S_BUFFER_LOAD_DWORD_IMM $sbase, $offset, 0)
>;
// 2. Offset loaded in an 32bit SGPR
def : Pat <
(SIload_constant v4i32:$sbase, (SMRDBufferSgpr i32:$offset)),
- (S_BUFFER_LOAD_DWORD_SGPR $sbase, $offset)
+ (S_BUFFER_LOAD_DWORD_SGPR $sbase, $offset, 0)
>;
} // End let AddedComplexity = 100
@@ -228,7 +283,7 @@ let Predicates = [isVI] in {
// 1. Offset as 20bit DWORD immediate
def : Pat <
(SIload_constant v4i32:$sbase, IMM20bit:$offset),
- (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
+ (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset), 0)
>;
def : Pat <
@@ -263,15 +318,22 @@ class SMRD_Real_si <bits<5> op, SM_Pseudo ps>
let Inst{31-27} = 0x18; //encoding
}
+// FIXME: Assembler should reject trying to use glc on SMRD
+// instructions on SI.
multiclass SM_Real_Loads_si<bits<5> op, string ps,
SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
+
def _IMM_si : SMRD_Real_si <op, immPs> {
- let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset:$offset);
+ let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset:$offset, GLC:$glc);
}
+
+ // FIXME: The operand name $offset is inconsistent with $soff used
+ // in the pseudo
def _SGPR_si : SMRD_Real_si <op, sgprPs> {
- let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset);
+ let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc);
}
+
}
defm S_LOAD_DWORD : SM_Real_Loads_si <0x00, "S_LOAD_DWORD">;
@@ -297,6 +359,7 @@ class SMEM_Real_vi <bits<8> op, SM_Pseudo ps>
: SM_Real<ps>
, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI>
, Enc64 {
+ bit glc;
let AssemblerPredicates = [isVI];
let DecoderNamespace = "VI";
@@ -304,10 +367,8 @@ class SMEM_Real_vi <bits<8> op, SM_Pseudo ps>
let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?);
- // glc is only applicable to scalar stores, which are not yet
- // implemented.
- let Inst{16} = 0; // glc bit
- let Inst{17} = imm;
+ let Inst{16} = !if(ps.has_glc, glc, ?);
+ let Inst{17} = imm;
let Inst{25-18} = op;
let Inst{31-26} = 0x30; //encoding
let Inst{51-32} = !if(ps.has_offset, offset{19-0}, ?);
@@ -317,10 +378,24 @@ multiclass SM_Real_Loads_vi<bits<8> op, string ps,
SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
def _IMM_vi : SMEM_Real_vi <op, immPs> {
- let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset:$offset);
+ let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset:$offset, GLC:$glc);
}
def _SGPR_vi : SMEM_Real_vi <op, sgprPs> {
- let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset);
+ let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc);
+ }
+}
+
+multiclass SM_Real_Stores_vi<bits<8> op, string ps,
+ SM_Store_Pseudo immPs = !cast<SM_Store_Pseudo>(ps#_IMM),
+ SM_Store_Pseudo sgprPs = !cast<SM_Store_Pseudo>(ps#_SGPR)> {
+ // FIXME: The operand name $offset is inconsistent with $soff used
+ // in the pseudo
+ def _IMM_vi : SMEM_Real_vi <op, immPs> {
+ let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset:$offset, GLC:$glc);
+ }
+
+ def _SGPR_vi : SMEM_Real_vi <op, sgprPs> {
+ let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc);
}
}
@@ -335,6 +410,14 @@ defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_vi <0x0a, "S_BUFFER_LOAD_DWORDX4">;
defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_vi <0x0b, "S_BUFFER_LOAD_DWORDX8">;
defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_vi <0x0c, "S_BUFFER_LOAD_DWORDX16">;
+defm S_STORE_DWORD : SM_Real_Stores_vi <0x10, "S_STORE_DWORD">;
+defm S_STORE_DWORDX2 : SM_Real_Stores_vi <0x11, "S_STORE_DWORDX2">;
+defm S_STORE_DWORDX4 : SM_Real_Stores_vi <0x12, "S_STORE_DWORDX4">;
+
+defm S_BUFFER_STORE_DWORD : SM_Real_Stores_vi <0x18, "S_BUFFER_STORE_DWORD">;
+defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_vi <0x19, "S_BUFFER_STORE_DWORDX2">;
+defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_vi <0x1a, "S_BUFFER_STORE_DWORDX4">;
+
def S_DCACHE_INV_vi : SMEM_Real_vi <0x20, S_DCACHE_INV>;
def S_DCACHE_WB_vi : SMEM_Real_vi <0x21, S_DCACHE_WB>;
def S_DCACHE_INV_VOL_vi : SMEM_Real_vi <0x22, S_DCACHE_INV_VOL>;
@@ -358,7 +441,7 @@ class SMRD_Real_Load_IMM_ci <bits<5> op, SM_Load_Pseudo ps> :
let AssemblerPredicates = [isCIOnly];
let DecoderNamespace = "CI";
- let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset);
+ let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc);
let LGKM_CNT = ps.LGKM_CNT;
let SMRD = ps.SMRD;
@@ -410,7 +493,7 @@ let AddedComplexity = SM_LOAD_PATTERN.AddedComplexity in {
class SMRD_Pattern_ci <string Instr, ValueType vt> : Pat <
(smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
- (vt (!cast<SM_Pseudo>(Instr#"_IMM_ci") $sbase, $offset))> {
+ (vt (!cast<SM_Pseudo>(Instr#"_IMM_ci") $sbase, $offset, 0))> {
let Predicates = [isCIOnly];
}
@@ -422,7 +505,7 @@ def : SMRD_Pattern_ci <"S_LOAD_DWORDX16", v16i32>;
def : Pat <
(SIload_constant v4i32:$sbase, (SMRDBufferImm32 i32:$offset)),
- (S_BUFFER_LOAD_DWORD_IMM_ci $sbase, $offset)> {
+ (S_BUFFER_LOAD_DWORD_IMM_ci $sbase, $offset, 0)> {
let Predicates = [isCI]; // should this be isCIOnly?
}
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir
index 124f9f519c0..234fe57b513 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir
@@ -46,10 +46,10 @@ body: |
%0 = COPY %sgpr2_sgpr3
%1 = COPY %vgpr2
%2 = COPY %vgpr3
- %3 = S_LOAD_DWORDX8_IMM %0, 0
- %4 = S_LOAD_DWORDX4_IMM %0, 12
- %5 = S_LOAD_DWORDX8_IMM %0, 16
- %6 = S_LOAD_DWORDX4_IMM %0, 28
+ %3 = S_LOAD_DWORDX8_IMM %0, 0, 0
+ %4 = S_LOAD_DWORDX4_IMM %0, 12, 0
+ %5 = S_LOAD_DWORDX8_IMM %0, 16, 0
+ %6 = S_LOAD_DWORDX4_IMM %0, 28, 0
undef %7.sub0 = S_MOV_B32 212739
%20 = COPY %7
%11 = COPY %20
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/target-index-operands.mir b/llvm/test/CodeGen/MIR/AMDGPU/target-index-operands.mir
index e15da0923be..a4e77f281ea 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/target-index-operands.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/target-index-operands.mir
@@ -1,4 +1,4 @@
-# RUN: llc -march=amdgcn -mcpu=SI -run-pass none -o - %s | FileCheck %s
+# RUN: llc -march=amdgcn -run-pass none -o - %s | FileCheck %s
# This test verifies that the MIR parser can parse target index operands.
--- |
@@ -55,15 +55,15 @@ body: |
%sgpr2 = S_ADD_U32 %sgpr2, target-index(amdgpu-constdata-start), implicit-def %scc, implicit-def %scc
%sgpr3 = S_ADDC_U32 %sgpr3, 0, implicit-def %scc, implicit %scc, implicit-def %scc, implicit %scc
%sgpr4_sgpr5 = S_LSHR_B64 %sgpr2_sgpr3, 32, implicit-def dead %scc
- %sgpr6 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 11
+ %sgpr6 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 11, 0
%sgpr7 = S_ASHR_I32 %sgpr6, 31, implicit-def dead %scc
%sgpr6_sgpr7 = S_LSHL_B64 %sgpr6_sgpr7, 2, implicit-def dead %scc
%sgpr2 = S_ADD_U32 %sgpr2, @float_gv, implicit-def %scc
%sgpr3 = S_ADDC_U32 %sgpr4, 0, implicit-def dead %scc, implicit %scc
%sgpr4 = S_ADD_U32 %sgpr2, %sgpr6, implicit-def %scc
%sgpr5 = S_ADDC_U32 %sgpr3, %sgpr7, implicit-def dead %scc, implicit %scc
- %sgpr2 = S_LOAD_DWORD_IMM %sgpr4_sgpr5, 0
- %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9
+ %sgpr2 = S_LOAD_DWORD_IMM %sgpr4_sgpr5, 0, 0
+ %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9, 0
%sgpr7 = S_MOV_B32 61440
%sgpr6 = S_MOV_B32 -1
%vgpr0 = V_MOV_B32_e32 killed %sgpr2, implicit %exec
@@ -85,15 +85,15 @@ body: |
%sgpr2 = S_ADD_U32 %sgpr2, target-index(amdgpu-constdata-start) + 1, implicit-def %scc, implicit-def %scc
%sgpr3 = S_ADDC_U32 %sgpr3, 0, implicit-def %scc, implicit %scc, implicit-def %scc, implicit %scc
%sgpr4_sgpr5 = S_LSHR_B64 %sgpr2_sgpr3, 32, implicit-def dead %scc
- %sgpr6 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 11
+ %sgpr6 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 11, 0
%sgpr7 = S_ASHR_I32 %sgpr6, 31, implicit-def dead %scc
%sgpr6_sgpr7 = S_LSHL_B64 %sgpr6_sgpr7, 2, implicit-def dead %scc
%sgpr2 = S_ADD_U32 %sgpr2, @float_gv, implicit-def %scc
%sgpr3 = S_ADDC_U32 %sgpr4, 0, implicit-def dead %scc, implicit %scc
%sgpr4 = S_ADD_U32 %sgpr2, %sgpr6, implicit-def %scc
%sgpr5 = S_ADDC_U32 %sgpr3, %sgpr7, implicit-def dead %scc, implicit %scc
- %sgpr2 = S_LOAD_DWORD_IMM %sgpr4_sgpr5, 0
- %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9
+ %sgpr2 = S_LOAD_DWORD_IMM %sgpr4_sgpr5, 0, 0
+ %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9, 0
%sgpr7 = S_MOV_B32 61440
%sgpr6 = S_MOV_B32 -1
%vgpr0 = V_MOV_B32_e32 killed %sgpr2, implicit %exec
diff --git a/llvm/test/MC/AMDGPU/smem.s b/llvm/test/MC/AMDGPU/smem.s
index ab2dcf4b8a1..d2f10224e21 100644
--- a/llvm/test/MC/AMDGPU/smem.s
+++ b/llvm/test/MC/AMDGPU/smem.s
@@ -13,3 +13,27 @@ s_dcache_wb_vol
s_memrealtime s[4:5]
// VI: s_memrealtime s[4:5] ; encoding: [0x00,0x01,0x94,0xc0,0x00,0x00,0x00,0x00]
// NOSI: error: instruction not supported on this GPU
+
+// FIXME: Should error about instruction on GPU
+s_store_dword s1, s[2:3], 0xfc
+// VI: s_store_dword s1, s[2:3], 0xfc ; encoding: [0x41,0x00,0x42,0xc0,0xfc,0x00,0x00,0x00]
+// NOSI: error: instruction not supported on this GPU
+
+s_store_dword s1, s[2:3], 0xfc glc
+// VI: s_store_dword s1, s[2:3], 0xfc glc ; encoding: [0x41,0x00,0x43,0xc0,0xfc,0x00,0x00,0x00]
+// NOSI: error: invalid operand for instruction
+
+s_store_dword s1, s[2:3], s4
+// VI: s_store_dword s1, s[2:3], s4 ; encoding: [0x41,0x00,0x40,0xc0,0x04,0x00,0x00,0x00]
+// NOSI: error: instruction not supported on this GPU
+
+s_store_dword s1, s[2:3], s4 glc
+// VI: s_store_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x41,0xc0,0x04,0x00,0x00,0x00]
+// NOSI: error: invalid operand for instruction
+
+// FIXME: Should error on SI instead of silently ignoring glc
+s_load_dword s1, s[2:3], 0xfc glc
+// VI: s_load_dword s1, s[2:3], 0xfc glc ; encoding: [0x41,0x00,0x03,0xc0,0xfc,0x00,0x00,0x00]
+
+s_load_dword s1, s[2:3], s4 glc
+// VI: s_load_dword s1, s[2:3], s4 glc ; encoding: [0x41,0x00,0x01,0xc0,0x04,0x00,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/smrd-err.s b/llvm/test/MC/AMDGPU/smrd-err.s
index 0f991e4aefe..d7ef74901c6 100644
--- a/llvm/test/MC/AMDGPU/smrd-err.s
+++ b/llvm/test/MC/AMDGPU/smrd-err.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -arch=amdgcn -mcpu=tahiti %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=SI %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI %s
s_load_dwordx4 s[100:103], s[2:3], s4
OpenPOWER on IntegriCloud