summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp13
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h3
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.td22
-rw-r--r--llvm/lib/Target/AMDGPU/SMInstructions.td23
4 files changed, 42 insertions, 19 deletions
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 9fac753a50a..1a8c04b150f 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -81,8 +81,9 @@ DECODE_OPERAND(VReg_96)
DECODE_OPERAND(VReg_128)
DECODE_OPERAND(SReg_32)
-DECODE_OPERAND(SReg_32_XM0)
+DECODE_OPERAND(SReg_32_XM0_XEXEC)
DECODE_OPERAND(SReg_64)
+DECODE_OPERAND(SReg_64_XEXEC)
DECODE_OPERAND(SReg_128)
DECODE_OPERAND(SReg_256)
DECODE_OPERAND(SReg_512)
@@ -277,13 +278,17 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const {
return decodeSrcOp(OPW32, Val);
}
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0(unsigned Val) const {
- // SReg_32_XM0 is SReg_32 without M0
+MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0_XEXEC(
+ unsigned Val) const {
+ // SReg_32_XM0 is SReg_32 without M0 or EXEC_LO/EXEC_HI
return decodeOperand_SReg_32(Val);
}
MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const {
- // see decodeOperand_SReg_32 comment
+ return decodeSrcOp(OPW64, Val);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_SReg_64_XEXEC(unsigned Val) const {
return decodeSrcOp(OPW64, Val);
}
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index f9768497734..9045b7fbd59 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -68,8 +68,9 @@ public:
MCOperand decodeOperand_VReg_128(unsigned Val) const;
MCOperand decodeOperand_SReg_32(unsigned Val) const;
- MCOperand decodeOperand_SReg_32_XM0(unsigned Val) const;
+ MCOperand decodeOperand_SReg_32_XM0_XEXEC(unsigned Val) const;
MCOperand decodeOperand_SReg_64(unsigned Val) const;
+ MCOperand decodeOperand_SReg_64_XEXEC(unsigned Val) const;
MCOperand decodeOperand_SReg_128(unsigned Val) const;
MCOperand decodeOperand_SReg_256(unsigned Val) const;
MCOperand decodeOperand_SReg_512(unsigned Val) const;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 5bdd8bec54e..6d1155bc390 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -256,20 +256,25 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
// Subset of SReg_32 without M0 for SMRD instructions and alike.
// See comments in SIInstructions.td for more info.
-def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
- (add SGPR_32, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI,
+def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
+ (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI,
TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI)> {
let AllocationPriority = 1;
}
+def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
+ (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> {
+ let AllocationPriority = 1;
+}
+
// Register class for all scalar registers (SGPRs + Special Registers)
def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
- (add SReg_32_XM0, M0_CLASS)> {
+ (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI)> {
let AllocationPriority = 1;
- let isAllocatable = 0;
}
def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)> {
+ let CopyCost = 1;
let AllocationPriority = 2;
}
@@ -277,8 +282,15 @@ def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add TTMP_64Regs)>
let isAllocatable = 0;
}
+def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32,
+ (add SGPR_64, VCC, FLAT_SCR)> {
+ let CopyCost = 1;
+ let AllocationPriority = 2;
+}
+
def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32,
- (add SGPR_64, VCC, EXEC, FLAT_SCR, TTMP_64, TBA, TMA)> {
+ (add SReg_64_XEXEC, EXEC, TTMP_64, TBA, TMA)> {
+ let CopyCost = 1;
let AllocationPriority = 2;
}
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index ddb695aa3e4..fcdcea64879 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -126,7 +126,7 @@ multiclass SM_Pseudo_Stores<string opName,
}
class SM_Time_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo<
- opName, (outs SReg_64:$sdst), (ins),
+ opName, (outs SReg_64_XEXEC:$sdst), (ins),
" $sdst", [(set i64:$sdst, (node))]> {
let hasSideEffects = 1;
// FIXME: mayStore = ? is a workaround for tablegen bug for different
@@ -155,18 +155,23 @@ class SM_Inval_Pseudo <string opName, SDPatternOperator node> : SM_Pseudo<
// We are using the SReg_32_XM0 and not the SReg_32 register class for 32-bit
// SMRD instructions, because the SReg_32_XM0 register class does not include M0
// and writing to M0 from an SMRD instruction will hang the GPU.
-defm S_LOAD_DWORD : SM_Pseudo_Loads <"s_load_dword", SReg_64, SReg_32_XM0>;
-defm S_LOAD_DWORDX2 : SM_Pseudo_Loads <"s_load_dwordx2", SReg_64, SReg_64>;
+
+// XXX - SMEM instructions do not allow exec for data operand, but
+// does sdst for SMRD on SI/CI?
+defm S_LOAD_DWORD : SM_Pseudo_Loads <"s_load_dword", SReg_64, SReg_32_XM0_XEXEC>;
+defm S_LOAD_DWORDX2 : SM_Pseudo_Loads <"s_load_dwordx2", SReg_64, SReg_64_XEXEC>;
defm S_LOAD_DWORDX4 : SM_Pseudo_Loads <"s_load_dwordx4", SReg_64, SReg_128>;
defm S_LOAD_DWORDX8 : SM_Pseudo_Loads <"s_load_dwordx8", SReg_64, SReg_256>;
defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <"s_load_dwordx16", SReg_64, SReg_512>;
defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <
- "s_buffer_load_dword", SReg_128, SReg_32_XM0
+ "s_buffer_load_dword", SReg_128, SReg_32_XM0_XEXEC
>;
+// FIXME: exec_lo/exec_hi appear to be allowed for SMRD loads on
+// SI/CI, bit disallowed for SMEM on VI.
defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads <
- "s_buffer_load_dwordx2", SReg_128, SReg_64
+ "s_buffer_load_dwordx2", SReg_128, SReg_64_XEXEC
>;
defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads <
@@ -181,16 +186,16 @@ defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <
"s_buffer_load_dwordx16", SReg_128, SReg_512
>;
-defm S_STORE_DWORD : SM_Pseudo_Stores <"s_store_dword", SReg_64, SReg_32_XM0>;
-defm S_STORE_DWORDX2 : SM_Pseudo_Stores <"s_store_dwordx2", SReg_64, SReg_64>;
+defm S_STORE_DWORD : SM_Pseudo_Stores <"s_store_dword", SReg_64, SReg_32_XM0_XEXEC>;
+defm S_STORE_DWORDX2 : SM_Pseudo_Stores <"s_store_dwordx2", SReg_64, SReg_64_XEXEC>;
defm S_STORE_DWORDX4 : SM_Pseudo_Stores <"s_store_dwordx4", SReg_64, SReg_128>;
defm S_BUFFER_STORE_DWORD : SM_Pseudo_Stores <
- "s_buffer_store_dword", SReg_128, SReg_32_XM0
+ "s_buffer_store_dword", SReg_128, SReg_32_XM0_XEXEC
>;
defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores <
- "s_buffer_store_dwordx2", SReg_128, SReg_64
+ "s_buffer_store_dwordx2", SReg_128, SReg_64_XEXEC
>;
defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores <
OpenPOWER on IntegriCloud