diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp | 13 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 22 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SMInstructions.td | 23 |
4 files changed, 42 insertions, 19 deletions
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 9fac753a50a..1a8c04b150f 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -81,8 +81,9 @@ DECODE_OPERAND(VReg_96) DECODE_OPERAND(VReg_128) DECODE_OPERAND(SReg_32) -DECODE_OPERAND(SReg_32_XM0) +DECODE_OPERAND(SReg_32_XM0_XEXEC) DECODE_OPERAND(SReg_64) +DECODE_OPERAND(SReg_64_XEXEC) DECODE_OPERAND(SReg_128) DECODE_OPERAND(SReg_256) DECODE_OPERAND(SReg_512) @@ -277,13 +278,17 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const { return decodeSrcOp(OPW32, Val); } -MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0(unsigned Val) const { - // SReg_32_XM0 is SReg_32 without M0 +MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0_XEXEC( + unsigned Val) const { + // SReg_32_XM0 is SReg_32 without M0 or EXEC_LO/EXEC_HI return decodeOperand_SReg_32(Val); } MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const { - // see decodeOperand_SReg_32 comment + return decodeSrcOp(OPW64, Val); +} + +MCOperand AMDGPUDisassembler::decodeOperand_SReg_64_XEXEC(unsigned Val) const { return decodeSrcOp(OPW64, Val); } diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index f9768497734..9045b7fbd59 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -68,8 +68,9 @@ public: MCOperand decodeOperand_VReg_128(unsigned Val) const; MCOperand decodeOperand_SReg_32(unsigned Val) const; - MCOperand decodeOperand_SReg_32_XM0(unsigned Val) const; + MCOperand decodeOperand_SReg_32_XM0_XEXEC(unsigned Val) const; MCOperand decodeOperand_SReg_64(unsigned Val) const; + MCOperand decodeOperand_SReg_64_XEXEC(unsigned Val) const; MCOperand decodeOperand_SReg_128(unsigned Val) const; MCOperand decodeOperand_SReg_256(unsigned Val) const; MCOperand decodeOperand_SReg_512(unsigned Val) const; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 5bdd8bec54e..6d1155bc390 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -256,20 +256,25 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, // Subset of SReg_32 without M0 for SMRD instructions and alike. // See comments in SIInstructions.td for more info. -def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32, - (add SGPR_32, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI, +def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32, + (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI)> { let AllocationPriority = 1; } +def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32, + (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> { + let AllocationPriority = 1; +} + // Register class for all scalar registers (SGPRs + Special Registers) def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32, - (add SReg_32_XM0, M0_CLASS)> { + (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI)> { let AllocationPriority = 1; - let isAllocatable = 0; } def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)> { + let CopyCost = 1; let AllocationPriority = 2; } @@ -277,8 +282,15 @@ def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add TTMP_64Regs)> let isAllocatable = 0; } +def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32, + (add SGPR_64, VCC, FLAT_SCR)> { + let CopyCost = 1; + let AllocationPriority = 2; +} + def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32, - (add SGPR_64, VCC, EXEC, FLAT_SCR, TTMP_64, TBA, TMA)> { + (add SReg_64_XEXEC, EXEC, TTMP_64, TBA, TMA)> { + let CopyCost = 1; let AllocationPriority = 2; } diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td index ddb695aa3e4..fcdcea64879 100644 --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -126,7 +126,7 @@ multiclass SM_Pseudo_Stores<string opName, } class SM_Time_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo< - opName, (outs SReg_64:$sdst), (ins), + opName, (outs SReg_64_XEXEC:$sdst), (ins), " $sdst", [(set i64:$sdst, (node))]> { let hasSideEffects = 1; // FIXME: mayStore = ? is a workaround for tablegen bug for different @@ -155,18 +155,23 @@ class SM_Inval_Pseudo <string opName, SDPatternOperator node> : SM_Pseudo< // We are using the SReg_32_XM0 and not the SReg_32 register class for 32-bit // SMRD instructions, because the SReg_32_XM0 register class does not include M0 // and writing to M0 from an SMRD instruction will hang the GPU. -defm S_LOAD_DWORD : SM_Pseudo_Loads <"s_load_dword", SReg_64, SReg_32_XM0>; -defm S_LOAD_DWORDX2 : SM_Pseudo_Loads <"s_load_dwordx2", SReg_64, SReg_64>; + +// XXX - SMEM instructions do not allow exec for data operand, but +// does sdst for SMRD on SI/CI? +defm S_LOAD_DWORD : SM_Pseudo_Loads <"s_load_dword", SReg_64, SReg_32_XM0_XEXEC>; +defm S_LOAD_DWORDX2 : SM_Pseudo_Loads <"s_load_dwordx2", SReg_64, SReg_64_XEXEC>; defm S_LOAD_DWORDX4 : SM_Pseudo_Loads <"s_load_dwordx4", SReg_64, SReg_128>; defm S_LOAD_DWORDX8 : SM_Pseudo_Loads <"s_load_dwordx8", SReg_64, SReg_256>; defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <"s_load_dwordx16", SReg_64, SReg_512>; defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads < - "s_buffer_load_dword", SReg_128, SReg_32_XM0 + "s_buffer_load_dword", SReg_128, SReg_32_XM0_XEXEC >; +// FIXME: exec_lo/exec_hi appear to be allowed for SMRD loads on +// SI/CI, bit disallowed for SMEM on VI. defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads < - "s_buffer_load_dwordx2", SReg_128, SReg_64 + "s_buffer_load_dwordx2", SReg_128, SReg_64_XEXEC >; defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads < @@ -181,16 +186,16 @@ defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads < "s_buffer_load_dwordx16", SReg_128, SReg_512 >; -defm S_STORE_DWORD : SM_Pseudo_Stores <"s_store_dword", SReg_64, SReg_32_XM0>; -defm S_STORE_DWORDX2 : SM_Pseudo_Stores <"s_store_dwordx2", SReg_64, SReg_64>; +defm S_STORE_DWORD : SM_Pseudo_Stores <"s_store_dword", SReg_64, SReg_32_XM0_XEXEC>; +defm S_STORE_DWORDX2 : SM_Pseudo_Stores <"s_store_dwordx2", SReg_64, SReg_64_XEXEC>; defm S_STORE_DWORDX4 : SM_Pseudo_Stores <"s_store_dwordx4", SReg_64, SReg_128>; defm S_BUFFER_STORE_DWORD : SM_Pseudo_Stores < - "s_buffer_store_dword", SReg_128, SReg_32_XM0 + "s_buffer_store_dword", SReg_128, SReg_32_XM0_XEXEC >; defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores < - "s_buffer_store_dwordx2", SReg_128, SReg_64 + "s_buffer_store_dwordx2", SReg_128, SReg_64_XEXEC >; defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores < |