diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2016-09-06 20:00:26 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2016-09-06 20:00:26 +0000 |
commit | 2add8a114054a7ae3acdea286d3b6542f3be8838 (patch) | |
tree | 781d5adbba2e8d276ebef1119528ddafa5b8ca21 | |
parent | 36103e868b8e1743aedcbc004ee211edbce720b1 (diff) | |
download | bcm5719-llvm-2add8a114054a7ae3acdea286d3b6542f3be8838.tar.gz bcm5719-llvm-2add8a114054a7ae3acdea286d3b6542f3be8838.zip |
AMDGPU/SI: Teach SIInstrInfo::FoldImmediate() to fold immediates into copies
Summary:
I put this code here, because I want to re-use it in a few other places.
This supersedes some of the immediate folding code we have in SIFoldOperands.
I think the peephole optimizers is probably a better place for folding
immediates into copies, since it does some register coalescing in the same time.
This will also make it easier to transition SIFoldOperands into a smarter pass,
where it looks at all uses of instruction at once to determine the optimal way to
fold operands. Right now, the pass just considers one operand at a time.
Reviewers: arsenm
Subscribers: wdng, nhaehnle, arsenm, llvm-commits, kzhuravl
Differential Revision: https://reviews.llvm.org/D23402
llvm-svn: 280744
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 29 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SOPInstructions.td | 3 |
2 files changed, 29 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 56daea6e57e..58747afdb84 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1184,14 +1184,39 @@ static void removeModOperands(MachineInstr &MI) { MI.RemoveOperand(Src0ModIdx); } -// TODO: Maybe this should be removed this and custom fold everything in -// SIFoldOperands? bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, MachineRegisterInfo *MRI) const { if (!MRI->hasOneNonDBGUse(Reg)) return false; unsigned Opc = UseMI.getOpcode(); + if (Opc == AMDGPU::COPY) { + bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg()); + switch (DefMI.getOpcode()) { + default: + return false; + case AMDGPU::S_MOV_B64: + // TODO: We could fold 64-bit immediates, but this get compilicated + // when there are sub-registers. + return false; + + case AMDGPU::V_MOV_B32_e32: + case AMDGPU::S_MOV_B32: + break; + } + unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32; + const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0); + assert(ImmOp); + // FIXME: We could handle FrameIndex values here. + if (!ImmOp->isImm()) { + return false; + } + UseMI.setDesc(get(NewOpc)); + UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm()); + UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent()); + return true; + } + if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64) { // Don't fold if we are using source modifiers. The new VOP2 instructions // don't have them. diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 14d6daa3dfb..460bb4d2195 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -25,6 +25,7 @@ class SOP1_Pseudo <string opName, dag outs, dag ins, let SALU = 1; let SOP1 = 1; let SchedRW = [WriteSALU]; + let UseNamedOperandTable = 1; string Mnemonic = opName; string AsmOperands = asmOps; @@ -1100,4 +1101,4 @@ def S_GETREG_B32_vi : SOPK_Real_vi <0x11, S_GETREG_B32>; def S_SETREG_B32_vi : SOPK_Real_vi <0x12, S_SETREG_B32>; //def S_GETREG_REGRD_B32_vi : SOPK_Real_vi <0x13, S_GETREG_REGRD_B32>; // see pseudo for comments def S_SETREG_IMM32_B32_vi : SOPK_Real64<0x14, S_SETREG_IMM32_B32>, - Select_vi<S_SETREG_IMM32_B32.Mnemonic>;
\ No newline at end of file + Select_vi<S_SETREG_IMM32_B32.Mnemonic>; |