summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp30
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h4
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp15
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.td189
4 files changed, 99 insertions, 139 deletions
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index b4d759ebea1..a4516254397 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -292,35 +292,7 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
}
#endif
- unsigned AltName = AMDGPU::NoRegAltName;
-
- if (MRI.getRegClass(AMDGPU::VReg_64RegClassID).contains(RegNo) ||
- MRI.getRegClass(AMDGPU::SGPR_64RegClassID).contains(RegNo) ||
- MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(RegNo))
- AltName = AMDGPU::Reg64;
- else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(RegNo) ||
- MRI.getRegClass(AMDGPU::SGPR_128RegClassID).contains(RegNo) ||
- MRI.getRegClass(AMDGPU::AReg_128RegClassID).contains(RegNo))
- AltName = AMDGPU::Reg128;
- else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(RegNo) ||
- MRI.getRegClass(AMDGPU::SReg_96RegClassID).contains(RegNo))
- AltName = AMDGPU::Reg96;
- else if (MRI.getRegClass(AMDGPU::VReg_160RegClassID).contains(RegNo) ||
- MRI.getRegClass(AMDGPU::SReg_160RegClassID).contains(RegNo))
- AltName = AMDGPU::Reg160;
- else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(RegNo) ||
- MRI.getRegClass(AMDGPU::SGPR_256RegClassID).contains(RegNo))
- AltName = AMDGPU::Reg256;
- else if (MRI.getRegClass(AMDGPU::VReg_512RegClassID).contains(RegNo) ||
- MRI.getRegClass(AMDGPU::SGPR_512RegClassID).contains(RegNo) ||
- MRI.getRegClass(AMDGPU::AReg_512RegClassID).contains(RegNo))
- AltName = AMDGPU::Reg512;
- else if (MRI.getRegClass(AMDGPU::VReg_1024RegClassID).contains(RegNo) ||
- MRI.getRegClass(AMDGPU::SReg_1024RegClassID).contains(RegNo) ||
- MRI.getRegClass(AMDGPU::AReg_1024RegClassID).contains(RegNo))
- AltName = AMDGPU::Reg1024;
-
- O << getRegisterName(RegNo, AltName);
+ O << getRegisterName(RegNo);
}
void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index b544d1ef360..0f62f039763 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -12,7 +12,6 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUINSTPRINTER_H
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUINSTPRINTER_H
-#include "AMDGPUMCTargetDesc.h"
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
@@ -26,8 +25,7 @@ public:
//Autogenerated by tblgen
void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
raw_ostream &O);
- static const char *getRegisterName(unsigned RegNo,
- unsigned AltIdx = AMDGPU::NoRegAltName);
+ static const char *getRegisterName(unsigned RegNo);
void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
const MCSubtargetInfo &STI) override;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 1be2d4480b5..652569d5caf 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1347,20 +1347,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
}
StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
- const TargetRegisterClass *RC = getMinimalPhysRegClass(Reg);
- unsigned Size = getRegSizeInBits(*RC);
- unsigned AltName = AMDGPU::NoRegAltName;
-
- switch (Size) {
- case 64: AltName = AMDGPU::Reg64; break;
- case 96: AltName = AMDGPU::Reg96; break;
- case 128: AltName = AMDGPU::Reg128; break;
- case 160: AltName = AMDGPU::Reg160; break;
- case 256: AltName = AMDGPU::Reg256; break;
- case 512: AltName = AMDGPU::Reg512; break;
- case 1024: AltName = AMDGPU::Reg1024; break;
- }
- return AMDGPUInstPrinter::getRegisterName(Reg, AltName);
+ return AMDGPUInstPrinter::getRegisterName(Reg);
}
// FIXME: This is very slow. It might be worth creating a map from physreg to
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 19e569586ec..2fbc2cbe939 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -37,48 +37,31 @@ class getSubRegs<int size> {
!if(!eq(size, 16), ret16, ret32))))));
}
-let Namespace = "AMDGPU" in {
-defset list<RegAltNameIndex> AllRegAltNameIndices = {
- def Reg64 : RegAltNameIndex;
- def Reg96 : RegAltNameIndex;
- def Reg128 : RegAltNameIndex;
- def Reg160 : RegAltNameIndex;
- def Reg256 : RegAltNameIndex;
- def Reg512 : RegAltNameIndex;
- def Reg1024 : RegAltNameIndex;
-}
+// Generates list of sequential register tuple names.
+// E.g. RegSeq<3,2,2,"s">.ret -> [ "s[0:1]", "s[2:3]" ]
+class RegSeq<int last_reg, int stride, int size, string prefix, int start = 0> {
+ int next = !add(start, stride);
+ int end_reg = !add(!add(start, size), -1);
+ list<string> ret =
+ !if(!le(end_reg, last_reg),
+ !listconcat([prefix # "[" # start # ":" # end_reg # "]"],
+ RegSeq<last_reg, stride, size, prefix, next>.ret),
+ []);
}
//===----------------------------------------------------------------------===//
// Declarations that describe the SI registers
//===----------------------------------------------------------------------===//
-class SIReg <string n, bits<16> regIdx = 0, string prefix = "",
- int regNo = !cast<int>(regIdx)> :
- Register<n, !if(!eq(prefix, ""),
- [ n, n, n, n, n, n, n ],
- [ prefix # "[" # regNo # ":" # !and(!add(regNo, 1), 255) # "]",
- prefix # "[" # regNo # ":" # !and(!add(regNo, 2), 255) # "]",
- prefix # "[" # regNo # ":" # !and(!add(regNo, 3), 255) # "]",
- prefix # "[" # regNo # ":" # !and(!add(regNo, 4), 255) # "]",
- prefix # "[" # regNo # ":" # !and(!add(regNo, 7), 255) # "]",
- prefix # "[" # regNo # ":" # !and(!add(regNo, 15), 255) # "]",
- prefix # "[" # regNo # ":" # !and(!add(regNo, 31), 255) # "]",
- ])>,
+class SIReg <string n, bits<16> regIdx = 0> :
+ Register<n>,
DwarfRegNum<[!cast<int>(HWEncoding)]> {
let Namespace = "AMDGPU";
- let RegAltNameIndices = AllRegAltNameIndices;
// This is the not yet the complete register encoding. An additional
// bit is set for VGPRs.
let HWEncoding = regIdx;
}
-class SIRegisterWithSubRegs<string n, list<Register> subregs> :
- RegisterWithSubRegs<n, subregs> {
- let RegAltNameIndices = AllRegAltNameIndices;
- let AltNames = [ n, n, n, n, n, n, n ];
-}
-
// Special Registers
def VCC_LO : SIReg<"vcc_lo", 106>;
def VCC_HI : SIReg<"vcc_hi", 107>;
@@ -91,7 +74,7 @@ def SP_REG : SIReg<"sp", 0>;
def SCRATCH_WAVE_OFFSET_REG : SIReg<"scratch_wave_offset", 0>;
// VCC for 64-bit instructions
-def VCC : SIRegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
+def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
DwarfRegAlias<VCC_LO> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
@@ -101,7 +84,7 @@ def VCC : SIRegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
def EXEC_LO : SIReg<"exec_lo", 126>;
def EXEC_HI : SIReg<"exec_hi", 127>;
-def EXEC : SIRegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>,
+def EXEC : RegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>,
DwarfRegAlias<EXEC_LO> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
@@ -132,7 +115,7 @@ def LDS_DIRECT : SIReg <"src_lds_direct", 254>;
def XNACK_MASK_LO : SIReg<"xnack_mask_lo", 104>;
def XNACK_MASK_HI : SIReg<"xnack_mask_hi", 105>;
-def XNACK_MASK : SIRegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]>,
+def XNACK_MASK : RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]>,
DwarfRegAlias<XNACK_MASK_LO> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
@@ -143,7 +126,7 @@ def XNACK_MASK : SIRegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_
def TBA_LO : SIReg<"tba_lo", 108>;
def TBA_HI : SIReg<"tba_hi", 109>;
-def TBA : SIRegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>,
+def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>,
DwarfRegAlias<TBA_LO> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
@@ -153,7 +136,7 @@ def TBA : SIRegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>,
def TMA_LO : SIReg<"tma_lo", 110>;
def TMA_HI : SIReg<"tma_hi", 111>;
-def TMA : SIRegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>,
+def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>,
DwarfRegAlias<TMA_LO> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
@@ -173,7 +156,7 @@ multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> {
}
class FlatReg <Register lo, Register hi, bits<16> encoding> :
- SIRegisterWithSubRegs<"flat_scratch", [lo, hi]>,
+ RegisterWithSubRegs<"flat_scratch", [lo, hi]>,
DwarfRegAlias<lo> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
@@ -189,19 +172,19 @@ def FLAT_SCR : FlatReg<FLAT_SCR_LO, FLAT_SCR_HI, 0>;
// SGPR registers
foreach Index = 0-105 in {
- def SGPR#Index : SIReg <"s"#Index, Index, "s">;
+ def SGPR#Index : SIReg <"s"#Index, Index>;
}
// VGPR registers
foreach Index = 0-255 in {
- def VGPR#Index : SIReg <"v"#Index, Index, "v"> {
+ def VGPR#Index : SIReg <"v"#Index, Index> {
let HWEncoding{8} = 1;
}
}
// AccVGPR registers
foreach Index = 0-255 in {
- def AGPR#Index : SIReg <"a"#Index, Index, "a"> {
+ def AGPR#Index : SIReg <"a"#Index, Index> {
let HWEncoding{8} = 1;
}
}
@@ -233,20 +216,23 @@ def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
// SGPR 64-bit registers
def SGPR_64Regs : RegisterTuples<getSubRegs<2>.ret,
[(add (decimate SGPR_32, 2)),
- (add (decimate (shl SGPR_32, 1), 2))]>;
+ (add (decimate (shl SGPR_32, 1), 2))],
+ RegSeq<105, 2, 2, "s">.ret>;
// SGPR 96-bit registers. No operations use these, but for symmetry with 96-bit VGPRs.
def SGPR_96Regs : RegisterTuples<getSubRegs<3>.ret,
[(add (decimate SGPR_32, 3)),
(add (decimate (shl SGPR_32, 1), 3)),
- (add (decimate (shl SGPR_32, 2), 3))]>;
+ (add (decimate (shl SGPR_32, 2), 3))],
+ RegSeq<105, 3, 3, "s">.ret>;
// SGPR 128-bit registers
def SGPR_128Regs : RegisterTuples<getSubRegs<4>.ret,
[(add (decimate SGPR_32, 4)),
(add (decimate (shl SGPR_32, 1), 4)),
(add (decimate (shl SGPR_32, 2), 4)),
- (add (decimate (shl SGPR_32, 3), 4))]>;
+ (add (decimate (shl SGPR_32, 3), 4))],
+ RegSeq<105, 4, 4, "s">.ret>;
// SGPR 160-bit registers. No operations use these, but for symmetry with 160-bit VGPRs.
def SGPR_160Regs : RegisterTuples<getSubRegs<5>.ret,
@@ -254,7 +240,8 @@ def SGPR_160Regs : RegisterTuples<getSubRegs<5>.ret,
(add (decimate (shl SGPR_32, 1), 4)),
(add (decimate (shl SGPR_32, 2), 4)),
(add (decimate (shl SGPR_32, 3), 4)),
- (add (decimate (shl SGPR_32, 4), 4))]>;
+ (add (decimate (shl SGPR_32, 4), 4))],
+ RegSeq<105, 4, 5, "s">.ret>;
// SGPR 256-bit registers
def SGPR_256Regs : RegisterTuples<getSubRegs<8>.ret,
@@ -265,7 +252,8 @@ def SGPR_256Regs : RegisterTuples<getSubRegs<8>.ret,
(add (decimate (shl SGPR_32, 4), 4)),
(add (decimate (shl SGPR_32, 5), 4)),
(add (decimate (shl SGPR_32, 6), 4)),
- (add (decimate (shl SGPR_32, 7), 4))]>;
+ (add (decimate (shl SGPR_32, 7), 4))],
+ RegSeq<105, 4, 8, "s">.ret>;
// SGPR 512-bit registers
def SGPR_512Regs : RegisterTuples<getSubRegs<16>.ret,
@@ -284,7 +272,8 @@ def SGPR_512Regs : RegisterTuples<getSubRegs<16>.ret,
(add (decimate (shl SGPR_32, 12), 4)),
(add (decimate (shl SGPR_32, 13), 4)),
(add (decimate (shl SGPR_32, 14), 4)),
- (add (decimate (shl SGPR_32, 15), 4))]>;
+ (add (decimate (shl SGPR_32, 15), 4))],
+ RegSeq<105, 4, 16, "s">.ret>;
// SGPR 1024-bit registers
def SGPR_1024Regs : RegisterTuples<getSubRegs<32>.ret,
@@ -319,7 +308,8 @@ def SGPR_1024Regs : RegisterTuples<getSubRegs<32>.ret,
(add (decimate (shl SGPR_32, 28), 4)),
(add (decimate (shl SGPR_32, 29), 4)),
(add (decimate (shl SGPR_32, 30), 4)),
- (add (decimate (shl SGPR_32, 31), 4))]>;
+ (add (decimate (shl SGPR_32, 31), 4))],
+ RegSeq<105, 4, 32, "s">.ret>;
// Trap handler TMP 32-bit registers
def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
@@ -330,14 +320,16 @@ def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
// Trap handler TMP 64-bit registers
def TTMP_64Regs : RegisterTuples<getSubRegs<2>.ret,
[(add (decimate TTMP_32, 2)),
- (add (decimate (shl TTMP_32, 1), 2))]>;
+ (add (decimate (shl TTMP_32, 1), 2))],
+ RegSeq<15, 2, 2, "ttmp">.ret>;
// Trap handler TMP 128-bit registers
def TTMP_128Regs : RegisterTuples<getSubRegs<4>.ret,
[(add (decimate TTMP_32, 4)),
(add (decimate (shl TTMP_32, 1), 4)),
(add (decimate (shl TTMP_32, 2), 4)),
- (add (decimate (shl TTMP_32, 3), 4))]>;
+ (add (decimate (shl TTMP_32, 3), 4))],
+ RegSeq<15, 4, 4, "ttmp">.ret>;
def TTMP_256Regs : RegisterTuples<getSubRegs<8>.ret,
[(add (decimate TTMP_32, 4)),
@@ -347,7 +339,8 @@ def TTMP_256Regs : RegisterTuples<getSubRegs<8>.ret,
(add (decimate (shl TTMP_32, 4), 4)),
(add (decimate (shl TTMP_32, 5), 4)),
(add (decimate (shl TTMP_32, 6), 4)),
- (add (decimate (shl TTMP_32, 7), 4))]>;
+ (add (decimate (shl TTMP_32, 7), 4))],
+ RegSeq<15, 4, 8, "ttmp">.ret>;
def TTMP_512Regs : RegisterTuples<getSubRegs<16>.ret,
[(add (decimate TTMP_32, 4)),
@@ -365,14 +358,15 @@ def TTMP_512Regs : RegisterTuples<getSubRegs<16>.ret,
(add (decimate (shl TTMP_32, 12), 4)),
(add (decimate (shl TTMP_32, 13), 4)),
(add (decimate (shl TTMP_32, 14), 4)),
- (add (decimate (shl TTMP_32, 15), 4))]>;
+ (add (decimate (shl TTMP_32, 15), 4))],
+ RegSeq<15, 4, 16, "ttmp">.ret>;
class TmpRegTuplesBase<int index, int size,
list<Register> subRegs,
list<SubRegIndex> indices = getSubRegs<size>.ret,
int index1 = !add(index, !add(size, -1)),
string name = "ttmp["#index#":"#index1#"]"> :
- SIRegisterWithSubRegs<name, subRegs> {
+ RegisterWithSubRegs<name, subRegs> {
let HWEncoding = subRegs[0].HWEncoding;
let SubRegIndices = indices;
}
@@ -457,20 +451,23 @@ def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
// VGPR 64-bit registers
def VGPR_64 : RegisterTuples<getSubRegs<2>.ret,
[(add (trunc VGPR_32, 255)),
- (add (shl VGPR_32, 1))]>;
+ (add (shl VGPR_32, 1))],
+ RegSeq<255, 1, 2, "v">.ret>;
// VGPR 96-bit registers
def VGPR_96 : RegisterTuples<getSubRegs<3>.ret,
[(add (trunc VGPR_32, 254)),
(add (shl VGPR_32, 1)),
- (add (shl VGPR_32, 2))]>;
+ (add (shl VGPR_32, 2))],
+ RegSeq<255, 1, 3, "v">.ret>;
// VGPR 128-bit registers
def VGPR_128 : RegisterTuples<getSubRegs<4>.ret,
[(add (trunc VGPR_32, 253)),
(add (shl VGPR_32, 1)),
(add (shl VGPR_32, 2)),
- (add (shl VGPR_32, 3))]>;
+ (add (shl VGPR_32, 3))],
+ RegSeq<255, 1, 4, "v">.ret>;
// VGPR 160-bit registers
def VGPR_160 : RegisterTuples<getSubRegs<5>.ret,
@@ -478,7 +475,8 @@ def VGPR_160 : RegisterTuples<getSubRegs<5>.ret,
(add (shl VGPR_32, 1)),
(add (shl VGPR_32, 2)),
(add (shl VGPR_32, 3)),
- (add (shl VGPR_32, 4))]>;
+ (add (shl VGPR_32, 4))],
+ RegSeq<255, 1, 5, "v">.ret>;
// VGPR 256-bit registers
def VGPR_256 : RegisterTuples<getSubRegs<8>.ret,
@@ -489,7 +487,8 @@ def VGPR_256 : RegisterTuples<getSubRegs<8>.ret,
(add (shl VGPR_32, 4)),
(add (shl VGPR_32, 5)),
(add (shl VGPR_32, 6)),
- (add (shl VGPR_32, 7))]>;
+ (add (shl VGPR_32, 7))],
+ RegSeq<255, 1, 8, "v">.ret>;
// VGPR 512-bit registers
def VGPR_512 : RegisterTuples<getSubRegs<16>.ret,
@@ -508,7 +507,8 @@ def VGPR_512 : RegisterTuples<getSubRegs<16>.ret,
(add (shl VGPR_32, 12)),
(add (shl VGPR_32, 13)),
(add (shl VGPR_32, 14)),
- (add (shl VGPR_32, 15))]>;
+ (add (shl VGPR_32, 15))],
+ RegSeq<255, 1, 16, "v">.ret>;
// VGPR 1024-bit registers
def VGPR_1024 : RegisterTuples<getSubRegs<32>.ret,
@@ -543,7 +543,8 @@ def VGPR_1024 : RegisterTuples<getSubRegs<32>.ret,
(add (shl VGPR_32, 28)),
(add (shl VGPR_32, 29)),
(add (shl VGPR_32, 30)),
- (add (shl VGPR_32, 31))]>;
+ (add (shl VGPR_32, 31))],
+ RegSeq<255, 1, 32, "v">.ret>;
// AccVGPR 32-bit registers
def AGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
@@ -555,14 +556,16 @@ def AGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
// AGPR 64-bit registers
def AGPR_64 : RegisterTuples<getSubRegs<2>.ret,
[(add (trunc AGPR_32, 255)),
- (add (shl AGPR_32, 1))]>;
+ (add (shl AGPR_32, 1))],
+ RegSeq<255, 1, 2, "a">.ret>;
// AGPR 128-bit registers
def AGPR_128 : RegisterTuples<getSubRegs<4>.ret,
[(add (trunc AGPR_32, 253)),
(add (shl AGPR_32, 1)),
(add (shl AGPR_32, 2)),
- (add (shl AGPR_32, 3))]>;
+ (add (shl AGPR_32, 3))],
+ RegSeq<255, 1, 4, "a">.ret>;
// AGPR 512-bit registers
def AGPR_512 : RegisterTuples<getSubRegs<16>.ret,
@@ -581,7 +584,8 @@ def AGPR_512 : RegisterTuples<getSubRegs<16>.ret,
(add (shl AGPR_32, 12)),
(add (shl AGPR_32, 13)),
(add (shl AGPR_32, 14)),
- (add (shl AGPR_32, 15))]>;
+ (add (shl AGPR_32, 15))],
+ RegSeq<255, 1, 16, "a">.ret>;
// AGPR 1024-bit registers
def AGPR_1024 : RegisterTuples<getSubRegs<32>.ret,
@@ -616,7 +620,8 @@ def AGPR_1024 : RegisterTuples<getSubRegs<32>.ret,
(add (shl AGPR_32, 28)),
(add (shl AGPR_32, 29)),
(add (shl AGPR_32, 30)),
- (add (shl AGPR_32, 31))]>;
+ (add (shl AGPR_32, 31))],
+ RegSeq<255, 1, 32, "a">.ret>;
//===----------------------------------------------------------------------===//
// Register classes used as source and destination
@@ -629,7 +634,7 @@ def Pseudo_SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16],
}
def Pseudo_SReg_128 : RegisterClass<"AMDGPU", [v4i32, v2i64, v2f64], 32,
- (add PRIVATE_RSRC_REG), Reg128> {
+ (add PRIVATE_RSRC_REG)> {
let isAllocatable = 0;
let CopyCost = -1;
}
@@ -672,14 +677,14 @@ def SRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1
}
def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32,
- (add SGPR_64Regs), Reg64> {
+ (add SGPR_64Regs)> {
let CopyCost = 1;
let AllocationPriority = 11;
}
// CCR (call clobbered registers) SGPR 64-bit registers
def CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
- (add (trunc SGPR_64, 16)), Reg64> {
+ (add (trunc SGPR_64, 16))> {
let CopyCost = SGPR_64.CopyCost;
let AllocationPriority = SGPR_64.AllocationPriority;
}
@@ -690,13 +695,13 @@ def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32,
}
def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
- (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA), Reg64> {
+ (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA)> {
let CopyCost = 1;
let AllocationPriority = 13;
}
def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
- (add SReg_64_XEXEC, EXEC), Reg64> {
+ (add SReg_64_XEXEC, EXEC)> {
let CopyCost = 1;
let AllocationPriority = 13;
}
@@ -719,17 +724,17 @@ let CopyCost = 2 in {
// There are no 3-component scalar instructions, but this is needed
// for symmetry with VGPRs.
def SGPR_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32,
- (add SGPR_96Regs), Reg96> {
+ (add SGPR_96Regs)> {
let AllocationPriority = 14;
}
def SReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32,
- (add SGPR_96), Reg96> {
+ (add SGPR_96)> {
let AllocationPriority = 14;
}
def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32,
- (add SGPR_128Regs), Reg128> {
+ (add SGPR_128Regs)> {
let AllocationPriority = 15;
}
@@ -739,7 +744,7 @@ def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32,
}
def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
- (add SGPR_128, TTMP_128), Reg128> {
+ (add SGPR_128, TTMP_128)> {
let AllocationPriority = 15;
}
@@ -748,17 +753,16 @@ def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
// There are no 5-component scalar instructions, but this is needed
// for symmetry with VGPRs.
def SGPR_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
- (add SGPR_160Regs), Reg160> {
+ (add SGPR_160Regs)> {
let AllocationPriority = 16;
}
def SReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
- (add SGPR_160), Reg160> {
+ (add SGPR_160)> {
let AllocationPriority = 16;
}
-def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs),
- Reg256> {
+def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs)> {
let AllocationPriority = 17;
}
@@ -767,14 +771,14 @@ def TTMP_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add TTMP_256Regs)> {
}
def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32,
- (add SGPR_256, TTMP_256), Reg256> {
+ (add SGPR_256, TTMP_256)> {
// Requires 4 s_mov_b64 to copy
let CopyCost = 4;
let AllocationPriority = 17;
}
def SGPR_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
- (add SGPR_512Regs), Reg512> {
+ (add SGPR_512Regs)> {
let AllocationPriority = 18;
}
@@ -784,7 +788,7 @@ def TTMP_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
}
def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
- (add SGPR_512, TTMP_512), Reg512> {
+ (add SGPR_512, TTMP_512)> {
// Requires 8 s_mov_b64 to copy
let CopyCost = 8;
let AllocationPriority = 18;
@@ -796,19 +800,19 @@ def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 3
}
def SGPR_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
- (add SGPR_1024Regs), Reg1024> {
+ (add SGPR_1024Regs)> {
let AllocationPriority = 19;
}
def SReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
- (add SGPR_1024), Reg1024> {
+ (add SGPR_1024)> {
let CopyCost = 16;
let AllocationPriority = 19;
}
// Register class for all vector registers (VGPRs + Interploation Registers)
def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32,
- (add VGPR_64), Reg64> {
+ (add VGPR_64)> {
let Size = 64;
// Requires 2 v_mov_b32 to copy
@@ -816,7 +820,7 @@ def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32
let AllocationPriority = 2;
}
-def VReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add VGPR_96), Reg96> {
+def VReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add VGPR_96)> {
let Size = 96;
// Requires 3 v_mov_b32 to copy
@@ -825,7 +829,7 @@ def VReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add VGPR_96), Reg96>
}
def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
- (add VGPR_128), Reg128> {
+ (add VGPR_128)> {
let Size = 128;
// Requires 4 v_mov_b32 to copy
@@ -834,7 +838,7 @@ def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
}
def VReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
- (add VGPR_160), Reg160> {
+ (add VGPR_160)> {
let Size = 160;
// Requires 5 v_mov_b32 to copy
@@ -843,28 +847,28 @@ def VReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
}
def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32,
- (add VGPR_256), Reg256> {
+ (add VGPR_256)> {
let Size = 256;
let CopyCost = 8;
let AllocationPriority = 6;
}
def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
- (add VGPR_512), Reg512> {
+ (add VGPR_512)> {
let Size = 512;
let CopyCost = 16;
let AllocationPriority = 7;
}
def VReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
- (add VGPR_1024), Reg1024> {
+ (add VGPR_1024)> {
let Size = 1024;
let CopyCost = 32;
let AllocationPriority = 8;
}
def AReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32,
- (add AGPR_64), Reg64> {
+ (add AGPR_64)> {
let Size = 64;
let CopyCost = 5;
@@ -872,7 +876,7 @@ def AReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32
}
def AReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
- (add AGPR_128), Reg128> {
+ (add AGPR_128)> {
let Size = 128;
// Requires 4 v_accvgpr_write and 4 v_accvgpr_read to copy + burn 1 vgpr
@@ -881,14 +885,14 @@ def AReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
}
def AReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
- (add AGPR_512), Reg512> {
+ (add AGPR_512)> {
let Size = 512;
let CopyCost = 33;
let AllocationPriority = 7;
}
def AReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
- (add AGPR_1024), Reg1024> {
+ (add AGPR_1024)> {
let Size = 1024;
let CopyCost = 65;
let AllocationPriority = 8;
@@ -903,8 +907,7 @@ def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
let isAllocatable = 0;
}
-def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64),
- Reg64> {
+def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> {
let isAllocatable = 0;
}
@@ -914,7 +917,7 @@ def AV_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
}
def AV_64 : RegisterClass<"AMDGPU", [i64, f64, v4f16], 32,
- (add AReg_64, VReg_64), Reg64> {
+ (add AReg_64, VReg_64)> {
let isAllocatable = 0;
}
OpenPOWER on IntegriCloud