diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-11-15 21:51:43 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-11-15 21:51:43 +0000 |
| commit | 301162c4fe0daa33b53f5e62c713cc9e42696800 (patch) | |
| tree | ab4a22800fb7c072c9c03166b20b366fe4e2c0ce /llvm/lib/Target/AMDGPU/SIISelLowering.cpp | |
| parent | afbe849d7745811b0fc7c67e07be59330f099393 (diff) | |
| download | bcm5719-llvm-301162c4fe0daa33b53f5e62c713cc9e42696800.tar.gz bcm5719-llvm-301162c4fe0daa33b53f5e62c713cc9e42696800.zip | |
AMDGPU: Replace i64 add/sub lowering
Use VOP3 add/addc like usual.
This has some tradeoffs. Inline immediates fold
a little better, but other constants are worse off.
SIShrinkInstructions could be made smarter to handle
these cases.
This allows us to avoid selecting scalar adds where we
need to track the carry in scc and replace its users.
This makes it easier to use the carryless VALU adds.
llvm-svn: 318340
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 52 |
1 files changed, 48 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 43c4be359f4..247a594a06b 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2602,8 +2602,8 @@ static MachineBasicBlock::iterator loadM0FromVGPR(const SIInstrInfo *TII, MachineBasicBlock::iterator I(&MI); unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SaveExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - unsigned TmpExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned SaveExec = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); + unsigned TmpExec = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); BuildMI(MBB, I, DL, TII->get(TargetOpcode::IMPLICIT_DEF), TmpExec); @@ -2954,13 +2954,57 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( } switch (MI.getOpcode()) { - case AMDGPU::SI_INIT_M0: + case AMDGPU::S_ADD_U64_PSEUDO: + case AMDGPU::S_SUB_U64_PSEUDO: { + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + const DebugLoc &DL = MI.getDebugLoc(); + + MachineOperand &Dest = MI.getOperand(0); + MachineOperand &Src0 = MI.getOperand(1); + MachineOperand &Src1 = MI.getOperand(2); + + unsigned DestSub0 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + + MachineOperand Src0Sub0 = TII->buildExtractSubRegOrImm(MI, MRI, + Src0, &AMDGPU::SReg_64RegClass, AMDGPU::sub0, + &AMDGPU::SReg_32_XM0RegClass); + MachineOperand Src0Sub1 = TII->buildExtractSubRegOrImm(MI, MRI, + Src0, &AMDGPU::SReg_64RegClass, AMDGPU::sub1, + &AMDGPU::SReg_32_XM0RegClass); + + MachineOperand Src1Sub0 = TII->buildExtractSubRegOrImm(MI, MRI, + Src1, &AMDGPU::SReg_64RegClass, AMDGPU::sub0, + &AMDGPU::SReg_32_XM0RegClass); + MachineOperand Src1Sub1 = TII->buildExtractSubRegOrImm(MI, MRI, + Src1, &AMDGPU::SReg_64RegClass, AMDGPU::sub1, + &AMDGPU::SReg_32_XM0RegClass); + + bool IsAdd = (MI.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO); + + unsigned LoOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; + unsigned HiOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; + BuildMI(*BB, MI, DL, TII->get(LoOpc), DestSub0) + .add(Src0Sub0) + .add(Src1Sub0); + BuildMI(*BB, MI, DL, TII->get(HiOpc), DestSub1) + .add(Src0Sub1) + .add(Src1Sub1); + BuildMI(*BB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest.getReg()) + .addReg(DestSub0) + .addImm(AMDGPU::sub0) + .addReg(DestSub1) + .addImm(AMDGPU::sub1); + MI.eraseFromParent(); + return BB; + } + case AMDGPU::SI_INIT_M0: { BuildMI(*BB, MI.getIterator(), MI.getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) .add(MI.getOperand(0)); MI.eraseFromParent(); return BB; - + } case AMDGPU::SI_INIT_EXEC: // This should be before all vector instructions. BuildMI(*BB, &*BB->begin(), MI.getDebugLoc(), TII->get(AMDGPU::S_MOV_B64), |

