diff options
| author | Dmitry Preobrazhensky <dmitry.preobrazhensky@amd.com> | 2017-12-22 15:18:06 +0000 |
|---|---|---|
| committer | Dmitry Preobrazhensky <dmitry.preobrazhensky@amd.com> | 2017-12-22 15:18:06 +0000 |
| commit | 27134953183463d6626f43820b97b939d425a5de (patch) | |
| tree | 7d276e2d4252ed6c2cdb7859fc8913a4aba3259d | |
| parent | 7f880c18508a7e0937d5d533dcc02240dfd69f48 (diff) | |
| download | bcm5719-llvm-27134953183463d6626f43820b97b939d425a5de.tar.gz bcm5719-llvm-27134953183463d6626f43820b97b939d425a5de.zip | |
[AMDGPU][MC] Added support of 256- and 512-bit tuples of ttmp registers
See bug 35561: https://bugs.llvm.org/show_bug.cgi?id=35561
This patch also affects implementation of SGPR and VGPR registers though changes are cosmetic.
Reviewers: artem.tamazov, arsenm
Differential Revision: https://reviews.llvm.org/D41437
llvm-svn: 321359
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp | 35 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 182 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 4 | ||||
| -rw-r--r-- | llvm/test/MC/AMDGPU/trap.s | 42 | ||||
| -rw-r--r-- | llvm/test/MC/Disassembler/AMDGPU/trap_gfx9.txt | 32 | ||||
| -rw-r--r-- | llvm/test/MC/Disassembler/AMDGPU/trap_vi.txt | 16 |
10 files changed, 287 insertions, 43 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index bb628b8c558..fda6252f46e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -695,18 +695,24 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( IsSGPR = false; Width = 3; } else if (AMDGPU::SReg_128RegClass.contains(Reg)) { + assert(!AMDGPU::TTMP_128RegClass.contains(Reg) && + "trap handler registers should not be used"); IsSGPR = true; Width = 4; } else if (AMDGPU::VReg_128RegClass.contains(Reg)) { IsSGPR = false; Width = 4; } else if (AMDGPU::SReg_256RegClass.contains(Reg)) { + assert(!AMDGPU::TTMP_256RegClass.contains(Reg) && + "trap handler registers should not be used"); IsSGPR = true; Width = 8; } else if (AMDGPU::VReg_256RegClass.contains(Reg)) { IsSGPR = false; Width = 8; } else if (AMDGPU::SReg_512RegClass.contains(Reg)) { + assert(!AMDGPU::TTMP_512RegClass.contains(Reg) && + "trap handler registers should not be used"); IsSGPR = true; Width = 16; } else if (AMDGPU::VReg_512RegClass.contains(Reg)) { diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 02ccfe852e1..7ecaf0bf87c 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1495,6 +1495,8 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) { case 1: return AMDGPU::TTMP_32RegClassID; case 2: return AMDGPU::TTMP_64RegClassID; case 4: return AMDGPU::TTMP_128RegClassID; + case 8: return AMDGPU::TTMP_256RegClassID; + case 16: return AMDGPU::TTMP_512RegClassID; } } else if (Is == IS_SGPR) { switch (RegWidth) { @@ -1502,8 +1504,8 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) { case 1: return AMDGPU::SGPR_32RegClassID; case 2: return AMDGPU::SGPR_64RegClassID; case 4: return AMDGPU::SGPR_128RegClassID; - case 8: return AMDGPU::SReg_256RegClassID; - case 16: return AMDGPU::SReg_512RegClassID; + case 8: return AMDGPU::SGPR_256RegClassID; + case 16: return AMDGPU::SGPR_512RegClassID; } } return -1; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 4a3f2c97517..47a2d3f2fdc 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -348,10 +348,12 @@ MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID, case AMDGPU::TTMP_128RegClassID: // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in // this bundle? - case AMDGPU::SReg_256RegClassID: - // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in + case AMDGPU::SGPR_256RegClassID: + case AMDGPU::TTMP_256RegClassID: + // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in // this bundle? - case AMDGPU::SReg_512RegClassID: + case AMDGPU::SGPR_512RegClassID: + case AMDGPU::TTMP_512RegClassID: shift = 2; break; // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in @@ -441,11 +443,11 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const { } MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const { - return createSRegOperand(AMDGPU::SReg_256RegClassID, Val); + return decodeDstOp(OPW256, Val); } MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const { - return createSRegOperand(AMDGPU::SReg_512RegClassID, Val); + return decodeDstOp(OPW512, Val); } MCOperand AMDGPUDisassembler::decodeLiteralConstant() const { @@ -593,6 +595,8 @@ unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const { return SGPR_32RegClassID; case OPW64: return SGPR_64RegClassID; case OPW128: return SGPR_128RegClassID; + case OPW256: return SGPR_256RegClassID; + case OPW512: return SGPR_512RegClassID; } } @@ -608,6 +612,8 @@ unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const { return TTMP_32RegClassID; case OPW64: return TTMP_64RegClassID; case OPW128: return TTMP_128RegClassID; + case OPW256: return TTMP_256RegClassID; + case OPW512: return TTMP_512RegClassID; } } @@ -659,6 +665,25 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c } } +MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) const { + using namespace AMDGPU::EncValues; + + assert(Val < 128); + assert(Width == OPW256 || Width == OPW512); + + if (Val <= SGPR_MAX) { + assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning. + return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN); + } + + int TTmpIdx = getTTmpIdx(Val); + if (TTmpIdx >= 0) { + return createSRegOperand(getTtmpClassId(Width), TTmpIdx); + } + + llvm_unreachable("unknown dst register"); +} + MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const { using namespace AMDGPU; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index ce396eb68c4..75cfc5e1128 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -95,6 +95,8 @@ public: OPW32, OPW64, OPW128, + OPW256, + OPW512, OPW16, OPWV216, OPW_LAST_, @@ -110,6 +112,7 @@ public: MCOperand decodeLiteralConstant() const; MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val) const; + MCOperand decodeDstOp(const OpWidthTy Width, unsigned Val) const; MCOperand decodeSpecialReg32(unsigned Val) const; MCOperand decodeSpecialReg64(unsigned Val) const; diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp index 67663d39967..bf57f88bef9 100644 --- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -335,13 +335,13 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O, } else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(RegNo)) { O << 'v'; NumRegs = 8; - } else if (MRI.getRegClass(AMDGPU::SReg_256RegClassID).contains(RegNo)) { + } else if (MRI.getRegClass(AMDGPU::SGPR_256RegClassID).contains(RegNo)) { O << 's'; NumRegs = 8; } else if (MRI.getRegClass(AMDGPU::VReg_512RegClassID).contains(RegNo)) { O << 'v'; NumRegs = 16; - } else if (MRI.getRegClass(AMDGPU::SReg_512RegClassID).contains(RegNo)) { + } else if (MRI.getRegClass(AMDGPU::SGPR_512RegClassID).contains(RegNo)) { O << 's'; NumRegs = 16; } else { diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 6b7c3ffb7bb..dd0efef7f91 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -8,6 +8,26 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// +// Helpers +//===----------------------------------------------------------------------===// + +class getSubRegs<int size> { + list<SubRegIndex> ret2 = [sub0, sub1]; + list<SubRegIndex> ret3 = [sub0, sub1, sub2]; + list<SubRegIndex> ret4 = [sub0, sub1, sub2, sub3]; + list<SubRegIndex> ret8 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7]; + list<SubRegIndex> ret16 = [sub0, sub1, sub2, sub3, + sub4, sub5, sub6, sub7, + sub8, sub9, sub10, sub11, + sub12, sub13, sub14, sub15]; + + list<SubRegIndex> ret = !if(!eq(size, 2), ret2, + !if(!eq(size, 3), ret3, + !if(!eq(size, 4), ret4, + !if(!eq(size, 8), ret8, ret16)))); +} + +//===----------------------------------------------------------------------===// // Declarations that describe the SI registers //===----------------------------------------------------------------------===// class SIReg <string n, bits<16> regIdx = 0> : Register<n>, @@ -141,19 +161,19 @@ def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, } // SGPR 64-bit registers -def SGPR_64Regs : RegisterTuples<[sub0, sub1], +def SGPR_64Regs : RegisterTuples<getSubRegs<2>.ret, [(add (decimate SGPR_32, 2)), (add (decimate (shl SGPR_32, 1), 2))]>; // SGPR 128-bit registers -def SGPR_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3], +def SGPR_128Regs : RegisterTuples<getSubRegs<4>.ret, [(add (decimate SGPR_32, 4)), (add (decimate (shl SGPR_32, 1), 4)), (add (decimate (shl SGPR_32, 2), 4)), (add (decimate (shl SGPR_32, 3), 4))]>; // SGPR 256-bit registers -def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], +def SGPR_256Regs : RegisterTuples<getSubRegs<8>.ret, [(add (decimate SGPR_32, 4)), (add (decimate (shl SGPR_32, 1), 4)), (add (decimate (shl SGPR_32, 2), 4)), @@ -164,8 +184,7 @@ def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], (add (decimate (shl SGPR_32, 7), 4))]>; // SGPR 512-bit registers -def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, - sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15], +def SGPR_512Regs : RegisterTuples<getSubRegs<16>.ret, [(add (decimate SGPR_32, 4)), (add (decimate (shl SGPR_32, 1), 4)), (add (decimate (shl SGPR_32, 2), 4)), @@ -190,47 +209,125 @@ def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32, } // Trap handler TMP 64-bit registers -def TTMP_64Regs : RegisterTuples<[sub0, sub1], +def TTMP_64Regs : RegisterTuples<getSubRegs<2>.ret, [(add (decimate TTMP_32, 2)), (add (decimate (shl TTMP_32, 1), 2))]>; // Trap handler TMP 128-bit registers -def TTMP_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3], +def TTMP_128Regs : RegisterTuples<getSubRegs<4>.ret, [(add (decimate TTMP_32, 4)), (add (decimate (shl TTMP_32, 1), 4)), (add (decimate (shl TTMP_32, 2), 4)), (add (decimate (shl TTMP_32, 3), 4))]>; -class TmpRegTuples <string tgt, - bit Is64Bit, - int Index0, - int Index1 = !add(Index0, 1), - int Index2 = !add(Index0, !if(Is64Bit, 1, 2)), - int Index3 = !add(Index0, !if(Is64Bit, 1, 3)), - string name = "ttmp["#Index0#":"#Index3#"]", - Register r0 = !cast<Register>("TTMP"#Index0#tgt), - Register r1 = !cast<Register>("TTMP"#Index1#tgt), - Register r2 = !cast<Register>("TTMP"#Index2#tgt), - Register r3 = !cast<Register>("TTMP"#Index3#tgt)> : - RegisterWithSubRegs<name, !if(Is64Bit, [r0, r1], [r0, r1, r2, r3])> { - let SubRegIndices = !if(Is64Bit, [sub0, sub1], [sub0, sub1, sub2, sub3]); - let HWEncoding = r0.HWEncoding; -} +def TTMP_256Regs : RegisterTuples<getSubRegs<8>.ret, + [(add (decimate TTMP_32, 4)), + (add (decimate (shl TTMP_32, 1), 4)), + (add (decimate (shl TTMP_32, 2), 4)), + (add (decimate (shl TTMP_32, 3), 4)), + (add (decimate (shl TTMP_32, 4), 4)), + (add (decimate (shl TTMP_32, 5), 4)), + (add (decimate (shl TTMP_32, 6), 4)), + (add (decimate (shl TTMP_32, 7), 4))]>; + +def TTMP_512Regs : RegisterTuples<getSubRegs<16>.ret, + [(add (decimate TTMP_32, 4)), + (add (decimate (shl TTMP_32, 1), 4)), + (add (decimate (shl TTMP_32, 2), 4)), + (add (decimate (shl TTMP_32, 3), 4)), + (add (decimate (shl TTMP_32, 4), 4)), + (add (decimate (shl TTMP_32, 5), 4)), + (add (decimate (shl TTMP_32, 6), 4)), + (add (decimate (shl TTMP_32, 7), 4)), + (add (decimate (shl TTMP_32, 8), 4)), + (add (decimate (shl TTMP_32, 9), 4)), + (add (decimate (shl TTMP_32, 10), 4)), + (add (decimate (shl TTMP_32, 11), 4)), + (add (decimate (shl TTMP_32, 12), 4)), + (add (decimate (shl TTMP_32, 13), 4)), + (add (decimate (shl TTMP_32, 14), 4)), + (add (decimate (shl TTMP_32, 15), 4))]>; + +class TmpRegTuplesBase<int index, int size, + list<Register> subRegs, + list<SubRegIndex> indices = getSubRegs<size>.ret, + int index1 = !add(index, !add(size, -1)), + string name = "ttmp["#index#":"#index1#"]"> : + RegisterWithSubRegs<name, subRegs> { + let HWEncoding = subRegs[0].HWEncoding; + let SubRegIndices = indices; +} + +class TmpRegTuples<string tgt, + int size, + int index0, + int index1 = !add(index0, 1), + int index2 = !add(index0, !if(!eq(size, 2), 1, 2)), + int index3 = !add(index0, !if(!eq(size, 2), 1, 3)), + int index4 = !add(index0, !if(!eq(size, 8), 4, 1)), + int index5 = !add(index0, !if(!eq(size, 8), 5, 1)), + int index6 = !add(index0, !if(!eq(size, 8), 6, 1)), + int index7 = !add(index0, !if(!eq(size, 8), 7, 1)), + Register r0 = !cast<Register>("TTMP"#index0#tgt), + Register r1 = !cast<Register>("TTMP"#index1#tgt), + Register r2 = !cast<Register>("TTMP"#index2#tgt), + Register r3 = !cast<Register>("TTMP"#index3#tgt), + Register r4 = !cast<Register>("TTMP"#index4#tgt), + Register r5 = !cast<Register>("TTMP"#index5#tgt), + Register r6 = !cast<Register>("TTMP"#index6#tgt), + Register r7 = !cast<Register>("TTMP"#index7#tgt)> : + TmpRegTuplesBase<index0, size, + !if(!eq(size, 2), [r0, r1], + !if(!eq(size, 4), [r0, r1, r2, r3], + [r0, r1, r2, r3, r4, r5, r6, r7])), + getSubRegs<size>.ret>; foreach Index = {0, 2, 4, 6, 8, 10, 12, 14} in { - def TTMP#Index#_TTMP#!add(Index,1)#_vi : TmpRegTuples<"_vi", 1, Index>; - def TTMP#Index#_TTMP#!add(Index,1)#_gfx9 : TmpRegTuples<"_gfx9", 1, Index>; + def TTMP#Index#_TTMP#!add(Index,1)#_vi : TmpRegTuples<"_vi", 2, Index>; + def TTMP#Index#_TTMP#!add(Index,1)#_gfx9 : TmpRegTuples<"_gfx9", 2, Index>; } foreach Index = {0, 4, 8, 12} in { def TTMP#Index#_TTMP#!add(Index,1)# _TTMP#!add(Index,2)# - _TTMP#!add(Index,3)#_vi : TmpRegTuples<"_vi", 0, Index>; + _TTMP#!add(Index,3)#_vi : TmpRegTuples<"_vi", 4, Index>; def TTMP#Index#_TTMP#!add(Index,1)# _TTMP#!add(Index,2)# - _TTMP#!add(Index,3)#_gfx9 : TmpRegTuples<"_gfx9", 0, Index>; + _TTMP#!add(Index,3)#_gfx9 : TmpRegTuples<"_gfx9", 4, Index>; } +foreach Index = {0, 4, 8} in { + def TTMP#Index#_TTMP#!add(Index,1)# + _TTMP#!add(Index,2)# + _TTMP#!add(Index,3)# + _TTMP#!add(Index,4)# + _TTMP#!add(Index,5)# + _TTMP#!add(Index,6)# + _TTMP#!add(Index,7)#_vi : TmpRegTuples<"_vi", 8, Index>; + def TTMP#Index#_TTMP#!add(Index,1)# + _TTMP#!add(Index,2)# + _TTMP#!add(Index,3)# + _TTMP#!add(Index,4)# + _TTMP#!add(Index,5)# + _TTMP#!add(Index,6)# + _TTMP#!add(Index,7)#_gfx9 : TmpRegTuples<"_gfx9", 8, Index>; +} + +def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_vi : + TmpRegTuplesBase<0, 16, + [TTMP0_vi, TTMP1_vi, TTMP2_vi, TTMP3_vi, + TTMP4_vi, TTMP5_vi, TTMP6_vi, TTMP7_vi, + TTMP8_vi, TTMP9_vi, TTMP10_vi, TTMP11_vi, + TTMP12_vi, TTMP13_vi, TTMP14_vi, TTMP15_vi]>; + +def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_gfx9 : + TmpRegTuplesBase<0, 16, + [TTMP0_gfx9, TTMP1_gfx9, TTMP2_gfx9, TTMP3_gfx9, + TTMP4_gfx9, TTMP5_gfx9, TTMP6_gfx9, TTMP7_gfx9, + TTMP8_gfx9, TTMP9_gfx9, TTMP10_gfx9, TTMP11_gfx9, + TTMP12_gfx9, TTMP13_gfx9, TTMP14_gfx9, TTMP15_gfx9]>; + + // VGPR 32-bit registers // i16/f16 only on VI+ def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, @@ -240,25 +337,25 @@ def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, } // VGPR 64-bit registers -def VGPR_64 : RegisterTuples<[sub0, sub1], +def VGPR_64 : RegisterTuples<getSubRegs<2>.ret, [(add (trunc VGPR_32, 255)), (add (shl VGPR_32, 1))]>; // VGPR 96-bit registers -def VGPR_96 : RegisterTuples<[sub0, sub1, sub2], +def VGPR_96 : RegisterTuples<getSubRegs<3>.ret, [(add (trunc VGPR_32, 254)), (add (shl VGPR_32, 1)), (add (shl VGPR_32, 2))]>; // VGPR 128-bit registers -def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3], +def VGPR_128 : RegisterTuples<getSubRegs<4>.ret, [(add (trunc VGPR_32, 253)), (add (shl VGPR_32, 1)), (add (shl VGPR_32, 2)), (add (shl VGPR_32, 3))]>; // VGPR 256-bit registers -def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], +def VGPR_256 : RegisterTuples<getSubRegs<8>.ret, [(add (trunc VGPR_32, 249)), (add (shl VGPR_32, 1)), (add (shl VGPR_32, 2)), @@ -269,8 +366,7 @@ def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], (add (shl VGPR_32, 7))]>; // VGPR 512-bit registers -def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, - sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15], +def VGPR_512 : RegisterTuples<getSubRegs<16>.ret, [(add (trunc VGPR_32, 241)), (add (shl VGPR_32, 1)), (add (shl VGPR_32, 2)), @@ -368,13 +464,31 @@ def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, } // End CopyCost = 2 -def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256)> { +def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs)> { + let AllocationPriority = 11; +} + +def TTMP_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add TTMP_256Regs)> { + let isAllocatable = 0; +} + +def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, + (add SGPR_256, TTMP_256)> { // Requires 4 s_mov_b64 to copy let CopyCost = 4; let AllocationPriority = 11; } -def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add SGPR_512)> { +def SGPR_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add SGPR_512Regs)> { + let AllocationPriority = 12; +} + +def TTMP_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add TTMP_512Regs)> { + let isAllocatable = 0; +} + +def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, + (add SGPR_512, TTMP_512)> { // Requires 8 s_mov_b64 to copy let CopyCost = 8; let AllocationPriority = 12; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 819a7add0be..125a3b22d0c 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -667,6 +667,10 @@ bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \ CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \ CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \ + CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \ + CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ + CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ + CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ } #define CASE_CI_VI(node) \ diff --git a/llvm/test/MC/AMDGPU/trap.s b/llvm/test/MC/AMDGPU/trap.s index de551ca00b5..7b527ba3072 100644 --- a/llvm/test/MC/AMDGPU/trap.s +++ b/llvm/test/MC/AMDGPU/trap.s @@ -190,6 +190,48 @@ s_mov_b64 ttmp[14:15], exec // GFX9: s_mov_b64 ttmp[14:15], exec ; encoding: [0x7e,0x01,0xfa,0xbe] //===----------------------------------------------------------------------===// +// Trap Handler related - 8-dword registers +// NB: gfx7 doc states that SMRD does not support trap registers for dst +//===----------------------------------------------------------------------===// + +s_buffer_load_dwordx8 ttmp[0:7], s[0:3], s0 +// VI: [0x00,0x1c,0x2c,0xc0,0x00,0x00,0x00,0x00] +// GFX9: [0x00,0x1b,0x2c,0xc0,0x00,0x00,0x00,0x00] + +s_buffer_load_dwordx8 ttmp[4:11], s[0:3], s0 +// VI: [0x00,0x1d,0x2c,0xc0,0x00,0x00,0x00,0x00] +// GFX9: [0x00,0x1c,0x2c,0xc0,0x00,0x00,0x00,0x00] + +s_buffer_load_dwordx8 ttmp[8:15], s[0:3], s0 +// NOSICIVI: error: not a valid operand +// GFX9: [0x00,0x1d,0x2c,0xc0,0x00,0x00,0x00,0x00] + +s_load_dwordx8 ttmp[0:7], s[0:1], s0 +// VI: [0x00,0x1c,0x0c,0xc0,0x00,0x00,0x00,0x00] +// GFX9: [0x00,0x1b,0x0c,0xc0,0x00,0x00,0x00,0x00] + +s_load_dwordx8 ttmp[4:11], s[0:1], s0 +// VI: [0x00,0x1d,0x0c,0xc0,0x00,0x00,0x00,0x00] +// GFX9: [0x00,0x1c,0x0c,0xc0,0x00,0x00,0x00,0x00] + +s_load_dwordx8 ttmp[8:15], s[0:1], s0 +// NOSICIVI: error: not a valid operand +// GFX9: [0x00,0x1d,0x0c,0xc0,0x00,0x00,0x00,0x00] + +//===----------------------------------------------------------------------===// +// Trap Handler related - 16-dword registers +// NB: gfx7 doc states that SMRD does not support trap registers for dst +//===----------------------------------------------------------------------===// + +s_buffer_load_dwordx16 ttmp[0:15], s[0:3], s0 +// NOSICIVI: error: not a valid operand +// GFX9: [0x00,0x1b,0x30,0xc0,0x00,0x00,0x00,0x00] + +s_load_dwordx16 ttmp[0:15], s[0:1], s0 +// NOSICIVI: error: not a valid operand +// GFX9: [0x00,0x1b,0x10,0xc0,0x00,0x00,0x00,0x00] + +//===----------------------------------------------------------------------===// // Trap Handler related - Some specific instructions //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/Disassembler/AMDGPU/trap_gfx9.txt b/llvm/test/MC/Disassembler/AMDGPU/trap_gfx9.txt index 70a570eec5a..0b140c42168 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/trap_gfx9.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/trap_gfx9.txt @@ -107,3 +107,35 @@ # GFX9: buffer_atomic_inc v1, off, ttmp[12:15], 56 glc ; encoding: [0x00,0x40,0x2c,0xe1,0x00,0x01,0x1e,0xb8] 0x00,0x40,0x2c,0xe1,0x00,0x01,0x1e,0xb8 + +#===----------------------------------------------------------------------===# +# Trap Handler related - 8-dword registers +#===----------------------------------------------------------------------===# + +# GFX9: s_buffer_load_dwordx8 ttmp[0:7], s[0:3], s0 ; encoding: [0x00,0x1b,0x2c,0xc0,0x00,0x00,0x00,0x00] +0x00,0x1b,0x2c,0xc0,0x00,0x00,0x00,0x00 + +# GFX9: s_buffer_load_dwordx8 ttmp[4:11], s[0:3], s0 ; encoding: [0x00,0x1c,0x2c,0xc0,0x00,0x00,0x00,0x00] +0x00,0x1c,0x2c,0xc0,0x00,0x00,0x00,0x00 + +# GFX9: s_buffer_load_dwordx8 ttmp[8:15], s[0:3], s0 ; encoding: [0x00,0x1d,0x2c,0xc0,0x00,0x00,0x00,0x00] +0x00,0x1d,0x2c,0xc0,0x00,0x00,0x00,0x00 + +# GFX9: s_load_dwordx8 ttmp[0:7], s[0:1], s0 ; encoding: [0x00,0x1b,0x0c,0xc0,0x00,0x00,0x00,0x00] +0x00,0x1b,0x0c,0xc0,0x00,0x00,0x00,0x00 + +# GFX9: s_load_dwordx8 ttmp[4:11], s[0:1], s0 ; encoding: [0x00,0x1c,0x0c,0xc0,0x00,0x00,0x00,0x00] +0x00,0x1c,0x0c,0xc0,0x00,0x00,0x00,0x00 + +# GFX9: s_load_dwordx8 ttmp[8:15], s[0:1], s0 ; encoding: [0x00,0x1d,0x0c,0xc0,0x00,0x00,0x00,0x00] +0x00,0x1d,0x0c,0xc0,0x00,0x00,0x00,0x00 + +#===----------------------------------------------------------------------===# +# Trap Handler related - 16-dword registers +#===----------------------------------------------------------------------===# + +# GFX9: s_buffer_load_dwordx16 ttmp[0:15], s[0:3], s0 ; encoding: [0x00,0x1b,0x30,0xc0,0x00,0x00,0x00,0x00] +0x00,0x1b,0x30,0xc0,0x00,0x00,0x00,0x00 + +# GFX9: s_load_dwordx16 ttmp[0:15], s[0:1], s0 ; encoding: [0x00,0x1b,0x10,0xc0,0x00,0x00,0x00,0x00] +0x00,0x1b,0x10,0xc0,0x00,0x00,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/trap_vi.txt b/llvm/test/MC/Disassembler/AMDGPU/trap_vi.txt index 8b131512050..eb254134cc5 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/trap_vi.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/trap_vi.txt @@ -107,3 +107,19 @@ # VI: buffer_atomic_inc v1, off, ttmp[8:11], 56 glc ; encoding: [0x00,0x40,0x2c,0xe1,0x00,0x01,0x1e,0xb8] 0x00,0x40,0x2c,0xe1,0x00,0x01,0x1e,0xb8 + +#===----------------------------------------------------------------------===# +# Trap Handler related - 8-dword registers +#===----------------------------------------------------------------------===# + +# VI: s_buffer_load_dwordx8 ttmp[0:7], s[0:3], s0 ; encoding: [0x00,0x1c,0x2c,0xc0,0x00,0x00,0x00,0x00] +0x00,0x1c,0x2c,0xc0,0x00,0x00,0x00,0x00 + +# VI: s_buffer_load_dwordx8 ttmp[4:11], s[0:3], s0 ; encoding: [0x00,0x1d,0x2c,0xc0,0x00,0x00,0x00,0x00] +0x00,0x1d,0x2c,0xc0,0x00,0x00,0x00,0x00 + +# VI: s_load_dwordx8 ttmp[0:7], s[0:1], s0 ; encoding: [0x00,0x1c,0x0c,0xc0,0x00,0x00,0x00,0x00] +0x00,0x1c,0x0c,0xc0,0x00,0x00,0x00,0x00 + +# VI: s_load_dwordx8 ttmp[4:11], s[0:1], s0 ; encoding: [0x00,0x1d,0x0c,0xc0,0x00,0x00,0x00,0x00] +0x00,0x1d,0x0c,0xc0,0x00,0x00,0x00,0x00 |

