diff options
Diffstat (limited to 'llvm/lib/Target')
6 files changed, 39 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index af85e64db20..72b0c5afc72 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -611,13 +611,14 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) { default: return -1; case 1: return AMDGPU::TTMP_32RegClassID; case 2: return AMDGPU::TTMP_64RegClassID; + case 4: return AMDGPU::TTMP_128RegClassID; } } else if (Is == IS_SGPR) { switch (RegWidth) { default: return -1; case 1: return AMDGPU::SGPR_32RegClassID; case 2: return AMDGPU::SGPR_64RegClassID; - case 4: return AMDGPU::SReg_128RegClassID; + case 4: return AMDGPU::SGPR_128RegClassID; case 8: return AMDGPU::SReg_256RegClassID; case 16: return AMDGPU::SReg_512RegClassID; } diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 2990b570f53..bbec73fda91 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -68,6 +68,7 @@ DECODE_OPERAND(VReg_128) DECODE_OPERAND(SGPR_32) DECODE_OPERAND(SReg_32) +DECODE_OPERAND(SReg_32_XM0) DECODE_OPERAND(SReg_64) DECODE_OPERAND(SReg_128) DECODE_OPERAND(SReg_256) @@ -248,6 +249,11 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const { return decodeSrcOp(OP32, Val); } +MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0(unsigned Val) const { + // SReg_32_XM0 is SReg_32 without M0 + return decodeOperand_SReg_32(Val); +} + MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const { // see decodeOperand_SReg_32 comment return decodeSrcOp(OP64, Val); diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index f1ba30e7bf5..680ed3068a1 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -64,6 +64,7 @@ namespace llvm { MCOperand decodeOperand_SGPR_32(unsigned Val) const; MCOperand decodeOperand_SReg_32(unsigned Val) const; + MCOperand decodeOperand_SReg_32_XM0(unsigned Val) const; MCOperand decodeOperand_SReg_64(unsigned Val) const; MCOperand decodeOperand_SReg_128(unsigned Val) const; MCOperand decodeOperand_SReg_256(unsigned Val) const; diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp index 1ea8c77be69..03252db84b7 100644 --- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -240,9 +240,12 @@ void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O, } else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(reg)) { Type = "v"; NumRegs = 4; - } else if (MRI.getRegClass(AMDGPU::SReg_128RegClassID).contains(reg)) { + } else if (MRI.getRegClass(AMDGPU::SGPR_128RegClassID).contains(reg)) { Type = "s"; NumRegs = 4; + } else if (MRI.getRegClass(AMDGPU::TTMP_128RegClassID).contains(reg)) { + Type = "ttmp"; + NumRegs = 4; } else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(reg)) { Type = "v"; NumRegs = 3; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 8b730107241..dfd6eb61e1a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -60,17 +60,17 @@ defm EXP : EXP_m; // SMRD Instructions //===----------------------------------------------------------------------===// -// We are using the SGPR_32 and not the SReg_32 register class for 32-bit -// SMRD instructions, because the SGPR_32 register class does not include M0 +// We are using the SReg_32_XM0 and not the SReg_32 register class for 32-bit +// SMRD instructions, because the SReg_32_XM0 register class does not include M0 // and writing to M0 from an SMRD instruction will hang the GPU. -defm S_LOAD_DWORD : SMRD_Helper <smrd<0x00>, "s_load_dword", SReg_64, SGPR_32>; +defm S_LOAD_DWORD : SMRD_Helper <smrd<0x00>, "s_load_dword", SReg_64, SReg_32_XM0>; defm S_LOAD_DWORDX2 : SMRD_Helper <smrd<0x01>, "s_load_dwordx2", SReg_64, SReg_64>; defm S_LOAD_DWORDX4 : SMRD_Helper <smrd<0x02>, "s_load_dwordx4", SReg_64, SReg_128>; defm S_LOAD_DWORDX8 : SMRD_Helper <smrd<0x03>, "s_load_dwordx8", SReg_64, SReg_256>; defm S_LOAD_DWORDX16 : SMRD_Helper <smrd<0x04>, "s_load_dwordx16", SReg_64, SReg_512>; defm S_BUFFER_LOAD_DWORD : SMRD_Helper < - smrd<0x08>, "s_buffer_load_dword", SReg_128, SGPR_32 + smrd<0x08>, "s_buffer_load_dword", SReg_128, SReg_32_XM0 >; defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper < @@ -2087,9 +2087,9 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> { } // It's unclear whether you can use M0 as the output of v_readlane_b32 -// instructions, so use SGPR_32 register class for spills to prevent +// instructions, so use SReg_32_XM0 register class for spills to prevent // this from happening. -defm SI_SPILL_S32 : SI_SPILL_SGPR <SGPR_32>; +defm SI_SPILL_S32 : SI_SPILL_SGPR <SReg_32_XM0>; defm SI_SPILL_S64 : SI_SPILL_SGPR <SReg_64>; defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>; defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>; @@ -3431,7 +3431,7 @@ def : ZExt_i64_i1_Pat<anyext>; def : Pat < (i64 (sext i32:$src)), (REG_SEQUENCE SReg_64, $src, sub0, - (i32 (COPY_TO_REGCLASS (S_ASHR_I32 $src, 31), SGPR_32)), sub1) + (i32 (COPY_TO_REGCLASS (S_ASHR_I32 $src, 31), SReg_32_XM0)), sub1) >; def : Pat < diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 384b7617b25..6c6fa3c5f9a 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -132,7 +132,7 @@ def SGPR_64Regs : RegisterTuples<[sub0, sub1], (add (decimate (shl SGPR_32, 1), 2))]>; // SGPR 128-bit registers -def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3], +def SGPR_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3], [(add (decimate SGPR_32, 4)), (add (decimate (shl SGPR_32, 1), 4)), (add (decimate (shl SGPR_32, 2), 4)), @@ -255,6 +255,13 @@ def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI) >; +// Subset of SReg_32 without M0 for SMRD instructions and alike. +// See comments in SIInstructions.td for more info. +def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32], 32, + (add SGPR_32, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI, + TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI) +>; + def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)>; def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add TTMP_64Regs)> { @@ -265,11 +272,19 @@ def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32, (add SGPR_64, VCC, EXEC, FLAT_SCR, TTMP_64, TBA, TMA) >; -def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128)> { - // Requires 2 s_mov_b64 to copy - let CopyCost = 2; +// Requires 2 s_mov_b64 to copy +let CopyCost = 2 in { + +def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128Regs)>; + +def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add TTMP_128Regs)> { + let isAllocatable = 0; } +def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128, TTMP_128)>; + +} // End CopyCost = 2 + def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256)> { // Requires 4 s_mov_b64 to copy let CopyCost = 4; |