summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-10-07 18:43:31 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-10-07 18:43:31 +0000
commitb4cbf9862c415ab68725443c39c374573c7932e7 (patch)
tree33842f3db79db925789d84232598a0d9fd86ff28 /llvm/lib/Target/AMDGPU
parent27269054d2df505f576eb3992d3f815c455ac7bb (diff)
downloadbcm5719-llvm-b4cbf9862c415ab68725443c39c374573c7932e7.tar.gz
bcm5719-llvm-b4cbf9862c415ab68725443c39c374573c7932e7.zip
AMDGPU/GlobalISel: Select more G_INSERT cases
At minimum handle the s64 insert type, which are emitted in real cases during legalization. We really need TableGen to emit something to emit something like the inverse of composeSubRegIndices do determine the subreg index to use. llvm-svn: 373938
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp98
1 files changed, 78 insertions, 20 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 056e1049461..28ebbd9101c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -555,39 +555,97 @@ bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
return false;
}
+// FIXME: TableGen should generate something to make this manageable for all
+// register classes. At a minimum we could use the opposite of
+// composeSubRegIndices and go up from the base 32-bit subreg.
+static unsigned getSubRegForSizeAndOffset(const SIRegisterInfo &TRI,
+ unsigned Size, unsigned Offset) {
+ switch (Size) {
+ case 32:
+ return TRI.getSubRegFromChannel(Offset / 32);
+ case 64: {
+ switch (Offset) {
+ case 0:
+ return AMDGPU::sub0_sub1;
+ case 32:
+ return AMDGPU::sub1_sub2;
+ case 64:
+ return AMDGPU::sub2_sub3;
+ case 96:
+ return AMDGPU::sub4_sub5;
+ case 128:
+ return AMDGPU::sub5_sub6;
+ case 160:
+ return AMDGPU::sub7_sub8;
+ // FIXME: Missing cases up to 1024 bits
+ default:
+ return AMDGPU::NoSubRegister;
+ }
+ }
+ case 96: {
+ switch (Offset) {
+ case 0:
+ return AMDGPU::sub0_sub1_sub2;
+ case 32:
+ return AMDGPU::sub1_sub2_sub3;
+ case 64:
+ return AMDGPU::sub2_sub3_sub4;
+ }
+ }
+ default:
+ return AMDGPU::NoSubRegister;
+ }
+}
+
bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
+
+ Register DstReg = I.getOperand(0).getReg();
Register Src0Reg = I.getOperand(1).getReg();
Register Src1Reg = I.getOperand(2).getReg();
LLT Src1Ty = MRI->getType(Src1Reg);
- if (Src1Ty.getSizeInBits() != 32)
- return false;
+
+ unsigned DstSize = MRI->getType(DstReg).getSizeInBits();
+ unsigned InsSize = Src1Ty.getSizeInBits();
int64_t Offset = I.getOperand(3).getImm();
if (Offset % 32 != 0)
return false;
- unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32);
- const DebugLoc &DL = I.getDebugLoc();
+ unsigned SubReg = getSubRegForSizeAndOffset(TRI, InsSize, Offset);
+ if (SubReg == AMDGPU::NoSubRegister)
+ return false;
+
+ const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
+ const TargetRegisterClass *DstRC =
+ TRI.getRegClassForSizeOnBank(DstSize, *DstBank, *MRI);
+ if (!DstRC)
+ return false;
- MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG))
- .addDef(I.getOperand(0).getReg())
- .addReg(Src0Reg)
- .addReg(Src1Reg)
- .addImm(SubReg);
+ const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI);
+ const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI);
+ const TargetRegisterClass *Src0RC =
+ TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank, *MRI);
+ const TargetRegisterClass *Src1RC =
+ TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank, *MRI);
+
+ // Deal with weird cases where the class only partially supports the subreg
+ // index.
+ Src0RC = TRI.getSubClassWithSubReg(Src0RC, SubReg);
+ if (!Src0RC)
+ return false;
- for (const MachineOperand &MO : Ins->operands()) {
- if (!MO.isReg())
- continue;
- if (Register::isPhysicalRegister(MO.getReg()))
- continue;
+ if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
+ !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) ||
+ !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI))
+ return false;
+
+ const DebugLoc &DL = I.getDebugLoc();
+ BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
+ .addReg(Src0Reg)
+ .addReg(Src1Reg)
+ .addImm(SubReg);
- const TargetRegisterClass *RC =
- TRI.getConstrainedRegClassForOperand(MO, *MRI);
- if (!RC)
- continue;
- RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
- }
I.eraseFromParent();
return true;
}
OpenPOWER on IntegriCloud